1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 char *source_path_with_name = NULL; 39 40 // Check if NULL qf passed in 41 if (!qf) return CEED_ERROR_SUCCESS; 42 43 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 44 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 45 46 if (qf->source_path) { 47 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 48 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 49 memcpy(source_path_with_name, qf->source_path, path_len); 50 memcpy(&source_path_with_name[path_len], ":", 1); 51 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 52 } else { 53 CeedCall(CeedCalloc(1, &source_path_with_name)); 54 } 55 56 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 57 { 58 CeedQFunctionContext ctx; 59 60 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 61 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 62 } 63 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 64 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 65 } 66 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 67 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 68 } 69 CeedCall(CeedFree(&source_path_with_name)); 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 Ceed ceed_fallback; 84 bool is_composite; 85 CeedOperator op_fallback; 86 87 // Check not already created 88 if (op->op_fallback) return CEED_ERROR_SUCCESS; 89 90 // Fallback Ceed 91 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 92 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 93 94 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 95 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 96 97 // Clone Op 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 117 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 118 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 119 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 120 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 121 op->input_fields[i]->vec)); 122 } 123 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 124 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 125 op->output_fields[i]->vec)); 126 } 127 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 128 if (op_fallback->num_qpts == 0) CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 // Cleanup 130 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 131 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 133 } 134 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 135 CeedCall(CeedOperatorCheckReady(op_fallback)); 136 // Note: No ref-counting here so we don't get caught in a reference loop. 137 // The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback. 138 op->op_fallback = op_fallback; 139 op_fallback->op_fallback_parent = op; 140 return CEED_ERROR_SUCCESS; 141 } 142 143 /** 144 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 145 146 @param[in] op CeedOperator to retrieve fallback for 147 @param[out] op_fallback Fallback CeedOperator 148 149 @return An error code: 0 - success, otherwise - failure 150 151 @ref Developer 152 **/ 153 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 154 // Create if needed 155 if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op)); 156 if (op->op_fallback) { 157 bool is_debug; 158 159 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 160 if (is_debug) { 161 Ceed ceed, ceed_fallback; 162 const char *resource, *resource_fallback; 163 164 CeedCall(CeedOperatorGetCeed(op, &ceed)); 165 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 166 CeedCall(CeedGetResource(ceed, &resource)); 167 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 168 169 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n"); 170 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 171 op->op_fallback); 172 } 173 } 174 *op_fallback = op->op_fallback; 175 return CEED_ERROR_SUCCESS; 176 } 177 178 /** 179 @brief Get the parent CeedOperator for a fallback CeedOperator 180 181 @param[in] op CeedOperator context 182 @param[out] parent Variable to store parent CeedOperator context 183 184 @return An error code: 0 - success, otherwise - failure 185 186 @ref Developer 187 **/ 188 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) { 189 *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL; 190 return CEED_ERROR_SUCCESS; 191 } 192 193 /** 194 @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator 195 196 @param[in] op CeedOperator context 197 @param[out] parent Variable to store parent Ceed context 198 199 @return An error code: 0 - success, otherwise - failure 200 201 @ref Developer 202 **/ 203 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) { 204 *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed; 205 return CEED_ERROR_SUCCESS; 206 } 207 208 /** 209 @brief Select correct basis matrix pointer based on CeedEvalMode 210 211 @param[in] basis CeedBasis from which to get the basis matrix 212 @param[in] eval_mode Current basis evaluation mode 213 @param[in] identity Pointer to identity matrix 214 @param[out] basis_ptr Basis pointer to set 215 216 @ref Developer 217 **/ 218 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 219 switch (eval_mode) { 220 case CEED_EVAL_NONE: 221 *basis_ptr = identity; 222 break; 223 case CEED_EVAL_INTERP: 224 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 225 break; 226 case CEED_EVAL_GRAD: 227 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 228 break; 229 case CEED_EVAL_DIV: 230 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 231 break; 232 case CEED_EVAL_CURL: 233 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 234 break; 235 case CEED_EVAL_WEIGHT: 236 break; // Caught by QF Assembly 237 } 238 assert(*basis_ptr != NULL); 239 return CEED_ERROR_SUCCESS; 240 } 241 242 /** 243 @brief Core logic for assembling operator diagonal or point block diagonal 244 245 @param[in] op CeedOperator to assemble point block diagonal 246 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 247 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 248 @param[out] assembled CeedVector to store assembled diagonal 249 250 @return An error code: 0 - success, otherwise - failure 251 252 @ref Developer 253 **/ 254 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) { 255 Ceed ceed; 256 bool is_composite; 257 258 CeedCall(CeedOperatorGetCeed(op, &ceed)); 259 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 260 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 261 262 // Assemble QFunction 263 CeedInt layout_qf[3]; 264 const CeedScalar *assembled_qf_array; 265 CeedVector assembled_qf = NULL; 266 CeedElemRestriction assembled_elem_rstr = NULL; 267 268 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 269 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); 270 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 271 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 272 273 // Get assembly data 274 const CeedEvalMode **eval_modes_in, **eval_modes_out; 275 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 276 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 277 CeedBasis *active_bases_in, *active_bases_out; 278 CeedElemRestriction *active_elem_rstrs_in, *active_elem_rstrs_out; 279 CeedOperatorAssemblyData data; 280 281 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 282 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 283 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, 284 &num_output_components)); 285 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL)); 286 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out)); 287 288 CeedCheck(num_active_bases_in == num_active_bases_out, ceed, CEED_ERROR_UNSUPPORTED, 289 "Cannot assemble operator diagonal with different numbers of input and output active bases"); 290 291 // Loop over all active bases 292 for (CeedInt b = 0; b < num_active_bases_in; b++) { 293 bool has_eval_none = false; 294 CeedInt num_elem, num_nodes, num_qpts, num_comp; 295 CeedScalar *elem_diag_array, *identity = NULL; 296 CeedVector elem_diag; 297 CeedElemRestriction diag_elem_rstr; 298 299 CeedCheck(active_elem_rstrs_in[b] == active_elem_rstrs_out[b], ceed, CEED_ERROR_UNSUPPORTED, 300 "Cannot assemble operator diagonal with different input and output active element restrictions"); 301 302 // Assemble point block diagonal restriction, if needed 303 if (is_point_block) { 304 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b], &diag_elem_rstr)); 305 } else { 306 CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b], &diag_elem_rstr)); 307 } 308 309 // Create diagonal vector 310 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 311 312 // Assemble element operator diagonals 313 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 314 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 315 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 316 CeedCall(CeedBasisGetNumNodes(active_bases_in[b], &num_nodes)); 317 CeedCall(CeedBasisGetNumComponents(active_bases_in[b], &num_comp)); 318 if (active_bases_in[b] == CEED_BASIS_NONE) num_qpts = num_nodes; 319 else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b], &num_qpts)); 320 321 if (active_bases_in[b] != active_bases_out[b]) { 322 CeedInt num_nodes_out, num_qpts_out, num_comp_out; 323 324 CeedCall(CeedBasisGetNumNodes(active_bases_out[b], &num_nodes_out)); 325 CeedCheck(num_nodes == num_nodes_out, ceed, CEED_ERROR_UNSUPPORTED, "Active input and output bases must have the same number of nodes"); 326 CeedCall(CeedBasisGetNumComponents(active_bases_out[b], &num_comp_out)); 327 CeedCheck(num_comp == num_comp_out, ceed, CEED_ERROR_UNSUPPORTED, "Active input and output bases must have the same number of components"); 328 if (active_bases_out[b] == CEED_BASIS_NONE) num_qpts_out = num_nodes_out; 329 else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_out[b], &num_qpts_out)); 330 CeedCheck(num_qpts == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED, 331 "Active input and output bases must have the same number of quadrature points"); 332 } 333 334 // Construct identity matrix for basis if required 335 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 336 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 337 } 338 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 339 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 340 } 341 if (has_eval_none) { 342 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 343 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 344 } 345 346 // Compute the diagonal of B^T D B 347 // Each element 348 for (CeedSize e = 0; e < num_elem; e++) { 349 // Each basis eval mode pair 350 CeedInt d_out = 0, q_comp_out; 351 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 352 353 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 354 CeedInt d_in = 0, q_comp_in; 355 const CeedScalar *B_t = NULL; 356 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 357 358 CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b], eval_modes_out[b][e_out], identity, &B_t)); 359 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b], eval_modes_out[b][e_out], &q_comp_out)); 360 if (q_comp_out > 1) { 361 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 362 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 363 } 364 eval_mode_out_prev = eval_modes_out[b][e_out]; 365 366 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 367 const CeedScalar *B = NULL; 368 369 CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b], eval_modes_in[b][e_in], identity, &B)); 370 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b], eval_modes_in[b][e_in], &q_comp_in)); 371 if (q_comp_in > 1) { 372 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 373 else B = &B[(++d_in) * num_qpts * num_nodes]; 374 } 375 eval_mode_in_prev = eval_modes_in[b][e_in]; 376 377 // Each component 378 for (CeedInt c_out = 0; c_out < num_comp; c_out++) { 379 // Each qpt/node pair 380 for (CeedInt q = 0; q < num_qpts; q++) { 381 if (is_point_block) { 382 // Point Block Diagonal 383 for (CeedInt c_in = 0; c_in < num_comp; c_in++) { 384 const CeedSize c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 385 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 386 387 for (CeedInt n = 0; n < num_nodes; n++) { 388 elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] += 389 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 390 } 391 } 392 } else { 393 // Diagonal Only 394 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 395 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 396 397 for (CeedInt n = 0; n < num_nodes; n++) { 398 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 399 } 400 } 401 } 402 } 403 } 404 } 405 } 406 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 407 408 // Assemble local operator diagonal 409 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 410 411 // Cleanup 412 CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 413 CeedCall(CeedVectorDestroy(&elem_diag)); 414 CeedCall(CeedFree(&identity)); 415 } 416 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 417 CeedCall(CeedVectorDestroy(&assembled_qf)); 418 return CEED_ERROR_SUCCESS; 419 } 420 421 /** 422 @brief Core logic for assembling composite operator diagonal 423 424 @param[in] op CeedOperator to assemble point block diagonal 425 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 426 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 427 @param[out] assembled CeedVector to store assembled diagonal 428 429 @return An error code: 0 - success, otherwise - failure 430 431 @ref Developer 432 **/ 433 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block, 434 CeedVector assembled) { 435 CeedInt num_sub; 436 CeedOperator *suboperators; 437 438 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 439 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 440 for (CeedInt i = 0; i < num_sub; i++) { 441 if (is_point_block) { 442 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 443 } else { 444 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 445 } 446 } 447 return CEED_ERROR_SUCCESS; 448 } 449 450 /** 451 @brief Build nonzero pattern for non-composite operator 452 453 Users should generally use CeedOperatorLinearAssembleSymbolic() 454 455 @param[in] op CeedOperator to assemble nonzero pattern 456 @param[in] offset Offset for number of entries 457 @param[out] rows Row number for each entry 458 @param[out] cols Column number for each entry 459 460 @return An error code: 0 - success, otherwise - failure 461 462 @ref Developer 463 **/ 464 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 465 Ceed ceed; 466 bool is_composite; 467 CeedSize num_nodes_in, num_nodes_out, count = 0; 468 CeedInt num_elem_in, elem_size_in, num_comp_in, layout_er_in[3]; 469 CeedInt num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries; 470 CeedScalar *array; 471 const CeedScalar *elem_dof_a_in, *elem_dof_a_out; 472 CeedVector index_vec_in, index_vec_out, elem_dof_in, elem_dof_out; 473 CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out; 474 475 CeedCall(CeedOperatorGetCeed(op, &ceed)); 476 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 477 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 478 479 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out)); 480 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 481 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 482 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 483 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 484 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in)); 485 486 // Determine elem_dof relation for input 487 CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in)); 488 CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array)); 489 for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i; 490 CeedCall(CeedVectorRestoreArray(index_vec_in, &array)); 491 CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in)); 492 CeedCall(CeedVectorSetValue(elem_dof_in, 0.0)); 493 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in)); 494 CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE)); 495 CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in)); 496 CeedCall(CeedVectorDestroy(&index_vec_in)); 497 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in)); 498 499 if (elem_rstr_in != elem_rstr_out) { 500 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 501 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 502 "Active input and output operator restrictions must have the same number of elements"); 503 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 504 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 505 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out)); 506 507 // Determine elem_dof relation for output 508 CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out)); 509 CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array)); 510 for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i; 511 CeedCall(CeedVectorRestoreArray(index_vec_out, &array)); 512 CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out)); 513 CeedCall(CeedVectorSetValue(elem_dof_out, 0.0)); 514 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out)); 515 CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE)); 516 CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out)); 517 CeedCall(CeedVectorDestroy(&index_vec_out)); 518 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out)); 519 } else { 520 num_elem_out = num_elem_in; 521 elem_size_out = elem_size_in; 522 num_comp_out = num_comp_in; 523 layout_er_out[0] = layout_er_in[0]; 524 layout_er_out[1] = layout_er_in[1]; 525 layout_er_out[2] = layout_er_in[2]; 526 elem_dof_a_out = elem_dof_a_in; 527 } 528 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 529 530 // Determine i, j locations for element matrices 531 for (CeedInt e = 0; e < num_elem_in; e++) { 532 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 533 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 534 for (CeedInt i = 0; i < elem_size_out; i++) { 535 for (CeedInt j = 0; j < elem_size_in; j++) { 536 const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2]; 537 const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2]; 538 const CeedInt row = elem_dof_a_out[elem_dof_index_row]; 539 const CeedInt col = elem_dof_a_in[elem_dof_index_col]; 540 541 rows[offset + count] = row; 542 cols[offset + count] = col; 543 count++; 544 } 545 } 546 } 547 } 548 } 549 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 550 CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in)); 551 CeedCall(CeedVectorDestroy(&elem_dof_in)); 552 if (elem_rstr_in != elem_rstr_out) { 553 CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out)); 554 CeedCall(CeedVectorDestroy(&elem_dof_out)); 555 } 556 return CEED_ERROR_SUCCESS; 557 } 558 559 /** 560 @brief Assemble nonzero entries for non-composite operator 561 562 Users should generally use CeedOperatorLinearAssemble() 563 564 @param[in] op CeedOperator to assemble 565 @param[in] offset Offset for number of entries 566 @param[out] values Values to assemble into matrix 567 568 @return An error code: 0 - success, otherwise - failure 569 570 @ref Developer 571 **/ 572 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 573 Ceed ceed; 574 bool is_composite; 575 576 CeedCall(CeedOperatorGetCeed(op, &ceed)); 577 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 578 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 579 580 // Early exit for empty operator 581 { 582 CeedInt num_elem = 0; 583 584 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 585 if (num_elem == 0) return CEED_ERROR_SUCCESS; 586 } 587 588 if (op->LinearAssembleSingle) { 589 // Backend version 590 CeedCall(op->LinearAssembleSingle(op, offset, values)); 591 return CEED_ERROR_SUCCESS; 592 } else { 593 // Operator fallback 594 CeedOperator op_fallback; 595 596 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 597 if (op_fallback) { 598 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 599 return CEED_ERROR_SUCCESS; 600 } 601 } 602 603 // Assemble QFunction 604 CeedInt layout_qf[3]; 605 const CeedScalar *assembled_qf_array; 606 CeedVector assembled_qf = NULL; 607 CeedElemRestriction assembled_elem_rstr = NULL; 608 609 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE)); 610 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); 611 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 612 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 613 614 // Get assembly data 615 CeedInt num_elem_in, elem_size_in, num_comp_in, num_qpts_in; 616 CeedInt num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries; 617 const CeedEvalMode **eval_modes_in, **eval_modes_out; 618 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 619 CeedBasis *active_bases_in, *active_bases_out, basis_in, basis_out; 620 const CeedScalar **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out; 621 CeedElemRestriction elem_rstr_in, elem_rstr_out; 622 CeedRestrictionType elem_rstr_type_in, elem_rstr_type_out; 623 const bool *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL; 624 const CeedInt8 *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL; 625 CeedOperatorAssemblyData data; 626 627 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 628 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out, 629 &num_eval_modes_out, &eval_modes_out, NULL, NULL)); 630 631 CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED, 632 "Cannot assemble operator with multiple active bases"); 633 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); 634 635 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out)); 636 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 637 basis_in = active_bases_in[0]; 638 basis_out = active_bases_out[0]; 639 B_mat_in = B_mats_in[0]; 640 B_mat_out = B_mats_out[0]; 641 642 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 643 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 644 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 645 if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in; 646 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in)); 647 648 CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in)); 649 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 650 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in)); 651 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 652 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in)); 653 } 654 655 if (elem_rstr_in != elem_rstr_out) { 656 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 657 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 658 "Active input and output operator restrictions must have the same number of elements"); 659 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 660 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 661 if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out; 662 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out)); 663 CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED, 664 "Active input and output bases must have the same number of quadrature points"); 665 666 CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out)); 667 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 668 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out)); 669 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 670 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out)); 671 } 672 } else { 673 num_elem_out = num_elem_in; 674 elem_size_out = elem_size_in; 675 num_comp_out = num_comp_in; 676 num_qpts_out = num_qpts_in; 677 678 elem_rstr_orients_out = elem_rstr_orients_in; 679 elem_rstr_curl_orients_out = elem_rstr_curl_orients_in; 680 } 681 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 682 683 // Loop over elements and put in data structure 684 // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 685 CeedSize count = 0; 686 CeedScalar *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL; 687 688 CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat)); 689 CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat)); 690 if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b)); 691 692 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 693 for (CeedSize e = 0; e < num_elem_in; e++) { 694 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 695 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 696 // Compute B^T*D 697 for (CeedSize n = 0; n < elem_size_out; n++) { 698 for (CeedSize q = 0; q < num_qpts_in; q++) { 699 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 700 const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in; 701 CeedScalar sum = 0.0; 702 703 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 704 const CeedSize b_out_index = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n; 705 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out; 706 const CeedSize qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 707 708 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 709 } 710 BTD_mat[btd_index] = sum; 711 } 712 } 713 } 714 715 // Form element matrix itself (for each block component) 716 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0])); 717 718 // Transform the element matrix if required 719 if (elem_rstr_orients_out) { 720 const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out]; 721 722 for (CeedInt i = 0; i < elem_size_out; i++) { 723 const double orient = elem_orients[i] ? -1.0 : 1.0; 724 725 for (CeedInt j = 0; j < elem_size_in; j++) { 726 elem_mat[i * elem_size_in + j] *= orient; 727 } 728 } 729 } else if (elem_rstr_curl_orients_out) { 730 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out]; 731 732 // T^T*(B^T*D*B) 733 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 734 for (CeedInt i = 0; i < elem_size_out; i++) { 735 for (CeedInt j = 0; j < elem_size_in; j++) { 736 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] + 737 (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) + 738 (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0); 739 } 740 } 741 } 742 if (elem_rstr_orients_in) { 743 const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in]; 744 745 for (CeedInt i = 0; i < elem_size_out; i++) { 746 for (CeedInt j = 0; j < elem_size_in; j++) { 747 elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0; 748 } 749 } 750 } else if (elem_rstr_curl_orients_in) { 751 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in]; 752 753 // (B^T*D*B)*T 754 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 755 for (CeedInt i = 0; i < elem_size_out; i++) { 756 for (CeedInt j = 0; j < elem_size_in; j++) { 757 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] + 758 (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) + 759 (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0); 760 } 761 } 762 } 763 764 // Put element matrix in coordinate data structure 765 for (CeedInt i = 0; i < elem_size_out; i++) { 766 for (CeedInt j = 0; j < elem_size_in; j++) { 767 vals[offset + count] = elem_mat[i * elem_size_in + j]; 768 count++; 769 } 770 } 771 } 772 } 773 } 774 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 775 CeedCall(CeedVectorRestoreArray(values, &vals)); 776 777 // Cleanup 778 CeedCall(CeedFree(&BTD_mat)); 779 CeedCall(CeedFree(&elem_mat)); 780 CeedCall(CeedFree(&elem_mat_b)); 781 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 782 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in)); 783 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 784 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in)); 785 } 786 if (elem_rstr_in != elem_rstr_out) { 787 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 788 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out)); 789 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 790 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out)); 791 } 792 } 793 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 794 CeedCall(CeedVectorDestroy(&assembled_qf)); 795 return CEED_ERROR_SUCCESS; 796 } 797 798 /** 799 @brief Count number of entries for assembled CeedOperator 800 801 @param[in] op CeedOperator to assemble 802 @param[out] num_entries Number of entries in assembled representation 803 804 @return An error code: 0 - success, otherwise - failure 805 806 @ref Utility 807 **/ 808 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) { 809 bool is_composite; 810 CeedInt num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out; 811 CeedElemRestriction rstr_in, rstr_out; 812 813 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 814 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 815 816 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out)); 817 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in)); 818 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in)); 819 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in)); 820 if (rstr_in != rstr_out) { 821 CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out)); 822 CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED, 823 "Active input and output operator restrictions must have the same number of elements"); 824 CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out)); 825 CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out)); 826 } else { 827 num_elem_out = num_elem_in; 828 elem_size_out = elem_size_in; 829 num_comp_out = num_comp_in; 830 } 831 *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in; 832 return CEED_ERROR_SUCCESS; 833 } 834 835 /** 836 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 837 838 @param[in] op_fine Fine grid operator 839 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 840 @param[in] rstr_coarse Coarse grid restriction 841 @param[in] basis_coarse Coarse grid active vector basis 842 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 843 @param[out] op_coarse Coarse grid operator 844 @param[out] op_prolong Coarse to fine operator, or NULL 845 @param[out] op_restrict Fine to coarse operator, or NULL 846 847 @return An error code: 0 - success, otherwise - failure 848 849 @ref Developer 850 **/ 851 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 852 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 853 bool is_composite; 854 Ceed ceed; 855 CeedInt num_comp; 856 CeedVector mult_vec = NULL; 857 CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL; 858 859 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 860 861 // Check for composite operator 862 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 863 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 864 865 // Coarse Grid 866 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 867 // -- Clone input fields 868 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 869 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 870 rstr_fine = op_fine->input_fields[i]->elem_rstr; 871 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 872 } else { 873 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 874 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 875 } 876 } 877 // -- Clone output fields 878 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 879 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 880 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 881 } else { 882 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 883 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 884 } 885 } 886 // -- Clone QFunctionAssemblyData 887 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 888 889 // Multiplicity vector 890 if (op_restrict || op_prolong) { 891 CeedVector mult_e_vec; 892 CeedRestrictionType rstr_type; 893 894 CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type)); 895 CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED, 896 "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported"); 897 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 898 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 899 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 900 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 901 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 902 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 903 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 904 CeedCall(CeedVectorDestroy(&mult_e_vec)); 905 CeedCall(CeedVectorReciprocal(mult_vec)); 906 } 907 908 // Clone name 909 bool has_name = op_fine->name; 910 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 911 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 912 913 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 914 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 915 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 916 917 // Restriction/Prolongation Operators 918 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 919 920 // Restriction 921 if (op_restrict) { 922 CeedInt *num_comp_r_data; 923 CeedQFunctionContext ctx_r; 924 CeedQFunction qf_restrict; 925 926 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 927 CeedCall(CeedCalloc(1, &num_comp_r_data)); 928 num_comp_r_data[0] = num_comp; 929 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 930 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 931 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 932 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 933 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 934 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 935 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 936 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 937 938 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 939 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 940 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 941 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 942 943 // Set name 944 char *restriction_name; 945 946 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 947 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 948 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 949 CeedCall(CeedFree(&restriction_name)); 950 951 // Check 952 CeedCall(CeedOperatorCheckReady(*op_restrict)); 953 954 // Cleanup 955 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 956 } 957 958 // Prolongation 959 if (op_prolong) { 960 CeedInt *num_comp_p_data; 961 CeedQFunctionContext ctx_p; 962 CeedQFunction qf_prolong; 963 964 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 965 CeedCall(CeedCalloc(1, &num_comp_p_data)); 966 num_comp_p_data[0] = num_comp; 967 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 968 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 969 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 970 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 971 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 972 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 973 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 974 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 975 976 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 977 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 978 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 979 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 980 981 // Set name 982 char *prolongation_name; 983 984 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 985 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 986 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 987 CeedCall(CeedFree(&prolongation_name)); 988 989 // Check 990 CeedCall(CeedOperatorCheckReady(*op_prolong)); 991 992 // Cleanup 993 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 994 } 995 996 // Check 997 CeedCall(CeedOperatorCheckReady(*op_coarse)); 998 999 // Cleanup 1000 CeedCall(CeedVectorDestroy(&mult_vec)); 1001 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 1002 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 1003 return CEED_ERROR_SUCCESS; 1004 } 1005 1006 /** 1007 @brief Build 1D mass matrix and Laplacian with perturbation 1008 1009 @param[in] interp_1d Interpolation matrix in one dimension 1010 @param[in] grad_1d Gradient matrix in one dimension 1011 @param[in] q_weight_1d Quadrature weights in one dimension 1012 @param[in] P_1d Number of basis nodes in one dimension 1013 @param[in] Q_1d Number of quadrature points in one dimension 1014 @param[in] dim Dimension of basis 1015 @param[out] mass Assembled mass matrix in one dimension 1016 @param[out] laplace Assembled perturbed Laplacian in one dimension 1017 1018 @return An error code: 0 - success, otherwise - failure 1019 1020 @ref Developer 1021 **/ 1022 CeedPragmaOptimizeOff 1023 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 1024 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 1025 for (CeedInt i = 0; i < P_1d; i++) { 1026 for (CeedInt j = 0; j < P_1d; j++) { 1027 CeedScalar sum = 0.0; 1028 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 1029 mass[i + j * P_1d] = sum; 1030 } 1031 } 1032 // -- Laplacian 1033 for (CeedInt i = 0; i < P_1d; i++) { 1034 for (CeedInt j = 0; j < P_1d; j++) { 1035 CeedScalar sum = 0.0; 1036 1037 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 1038 laplace[i + j * P_1d] = sum; 1039 } 1040 } 1041 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 1042 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 1043 return CEED_ERROR_SUCCESS; 1044 } 1045 CeedPragmaOptimizeOn 1046 1047 /// @} 1048 1049 /// ---------------------------------------------------------------------------- 1050 /// CeedOperator Backend API 1051 /// ---------------------------------------------------------------------------- 1052 /// @addtogroup CeedOperatorBackend 1053 /// @{ 1054 1055 /** 1056 @brief Create point block restriction for active operator field 1057 1058 @param[in] rstr Original CeedElemRestriction for active field 1059 @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored 1060 1061 @return An error code: 0 - success, otherwise - failure 1062 1063 @ref Backend 1064 **/ 1065 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) { 1066 Ceed ceed; 1067 CeedInt num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets; 1068 CeedSize l_size; 1069 const CeedInt *offsets; 1070 1071 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 1072 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 1073 1074 // Expand offsets 1075 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 1076 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 1077 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 1078 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 1079 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 1080 shift = num_comp; 1081 if (comp_stride != 1) shift *= num_comp; 1082 CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets)); 1083 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 1084 point_block_offsets[i] = offsets[i] * shift; 1085 } 1086 1087 // Create new restriction 1088 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 1089 point_block_offsets, point_block_rstr)); 1090 1091 // Cleanup 1092 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 1093 return CEED_ERROR_SUCCESS; 1094 } 1095 1096 /** 1097 @brief Create object holding CeedQFunction assembly data for CeedOperator 1098 1099 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1100 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 1101 1102 @return An error code: 0 - success, otherwise - failure 1103 1104 @ref Backend 1105 **/ 1106 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 1107 CeedCall(CeedCalloc(1, data)); 1108 (*data)->ref_count = 1; 1109 (*data)->ceed = ceed; 1110 CeedCall(CeedReference(ceed)); 1111 return CEED_ERROR_SUCCESS; 1112 } 1113 1114 /** 1115 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1116 1117 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 1118 1119 @return An error code: 0 - success, otherwise - failure 1120 1121 @ref Backend 1122 **/ 1123 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1124 data->ref_count++; 1125 return CEED_ERROR_SUCCESS; 1126 } 1127 1128 /** 1129 @brief Set re-use of CeedQFunctionAssemblyData 1130 1131 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 1132 @param[in] reuse_data Boolean flag indicating data re-use 1133 1134 @return An error code: 0 - success, otherwise - failure 1135 1136 @ref Backend 1137 **/ 1138 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 1139 data->reuse_data = reuse_data; 1140 data->needs_data_update = true; 1141 return CEED_ERROR_SUCCESS; 1142 } 1143 1144 /** 1145 @brief Mark QFunctionAssemblyData as stale 1146 1147 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 1148 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 1149 1150 @return An error code: 0 - success, otherwise - failure 1151 1152 @ref Backend 1153 **/ 1154 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 1155 data->needs_data_update = needs_data_update; 1156 return CEED_ERROR_SUCCESS; 1157 } 1158 1159 /** 1160 @brief Determine if QFunctionAssemblyData needs update 1161 1162 @param[in] data CeedQFunctionAssemblyData to mark as stale 1163 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1164 1165 @return An error code: 0 - success, otherwise - failure 1166 1167 @ref Backend 1168 **/ 1169 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1170 *is_update_needed = !data->reuse_data || data->needs_data_update; 1171 return CEED_ERROR_SUCCESS; 1172 } 1173 1174 /** 1175 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1176 1177 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1178 1179 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1180 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1181 CeedQFunctionAssemblyData. 1182 1183 @param[in] data CeedQFunctionAssemblyData to copy reference to 1184 @param[in,out] data_copy Variable to store copied reference 1185 1186 @return An error code: 0 - success, otherwise - failure 1187 1188 @ref Backend 1189 **/ 1190 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1191 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1192 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1193 *data_copy = data; 1194 return CEED_ERROR_SUCCESS; 1195 } 1196 1197 /** 1198 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1199 1200 @param[in] data CeedQFunctionAssemblyData to retrieve status 1201 @param[out] is_setup Boolean flag for setup status 1202 1203 @return An error code: 0 - success, otherwise - failure 1204 1205 @ref Backend 1206 **/ 1207 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1208 *is_setup = data->is_setup; 1209 return CEED_ERROR_SUCCESS; 1210 } 1211 1212 /** 1213 @brief Set internal objects for CeedQFunctionAssemblyData 1214 1215 @param[in,out] data CeedQFunctionAssemblyData to set objects 1216 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1217 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1218 1219 @return An error code: 0 - success, otherwise - failure 1220 1221 @ref Backend 1222 **/ 1223 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1224 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1225 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1226 1227 data->is_setup = true; 1228 return CEED_ERROR_SUCCESS; 1229 } 1230 1231 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1232 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1233 1234 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1235 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1236 return CEED_ERROR_SUCCESS; 1237 } 1238 1239 /** 1240 @brief Destroy CeedQFunctionAssemblyData 1241 1242 @param[in,out] data CeedQFunctionAssemblyData to destroy 1243 1244 @return An error code: 0 - success, otherwise - failure 1245 1246 @ref Backend 1247 **/ 1248 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1249 if (!*data || --(*data)->ref_count > 0) { 1250 *data = NULL; 1251 return CEED_ERROR_SUCCESS; 1252 } 1253 CeedCall(CeedDestroy(&(*data)->ceed)); 1254 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1255 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1256 1257 CeedCall(CeedFree(data)); 1258 return CEED_ERROR_SUCCESS; 1259 } 1260 1261 /** 1262 @brief Get CeedOperatorAssemblyData 1263 1264 @param[in] op CeedOperator to assemble 1265 @param[out] data CeedQFunctionAssemblyData 1266 1267 @return An error code: 0 - success, otherwise - failure 1268 1269 @ref Backend 1270 **/ 1271 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1272 if (!op->op_assembled) { 1273 CeedOperatorAssemblyData data; 1274 1275 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1276 op->op_assembled = data; 1277 } 1278 *data = op->op_assembled; 1279 return CEED_ERROR_SUCCESS; 1280 } 1281 1282 /** 1283 @brief Create object holding CeedOperator assembly data. 1284 1285 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1286 An array with references to the corresponding active CeedElemRestrictions is also stored. 1287 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1288 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1289 CeedEvalMode. 1290 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1291 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1292 1293 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1294 @param[in] op CeedOperator to be assembled 1295 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1296 1297 @return An error code: 0 - success, otherwise - failure 1298 1299 @ref Backend 1300 **/ 1301 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1302 CeedInt num_active_bases_in = 0, num_active_bases_out = 0, offset = 0; 1303 CeedInt num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL; 1304 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1305 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1306 CeedQFunctionField *qf_fields; 1307 CeedQFunction qf; 1308 CeedOperatorField *op_fields; 1309 bool is_composite; 1310 1311 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1312 CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators."); 1313 1314 // Allocate 1315 CeedCall(CeedCalloc(1, data)); 1316 (*data)->ceed = ceed; 1317 CeedCall(CeedReference(ceed)); 1318 1319 // Build OperatorAssembly data 1320 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1321 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1322 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1323 1324 // Determine active input basis 1325 for (CeedInt i = 0; i < num_input_fields; i++) { 1326 CeedVector vec; 1327 1328 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1329 if (vec == CEED_VECTOR_ACTIVE) { 1330 CeedInt index = -1, num_comp, q_comp; 1331 CeedEvalMode eval_mode; 1332 CeedBasis basis_in = NULL; 1333 1334 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1335 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1336 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1337 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1338 for (CeedInt i = 0; i < num_active_bases_in; i++) { 1339 if ((*data)->active_bases_in[i] == basis_in) index = i; 1340 } 1341 if (index == -1) { 1342 CeedElemRestriction elem_rstr_in; 1343 1344 index = num_active_bases_in; 1345 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in)); 1346 (*data)->active_bases_in[num_active_bases_in] = NULL; 1347 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in])); 1348 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in)); 1349 (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL; 1350 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1351 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in])); 1352 CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in)); 1353 num_eval_modes_in[index] = 0; 1354 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in)); 1355 eval_modes_in[index] = NULL; 1356 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in)); 1357 eval_mode_offsets_in[index] = NULL; 1358 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in)); 1359 (*data)->assembled_bases_in[index] = NULL; 1360 num_active_bases_in++; 1361 } 1362 if (eval_mode != CEED_EVAL_WEIGHT) { 1363 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1364 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1365 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1366 for (CeedInt d = 0; d < q_comp; d++) { 1367 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1368 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1369 offset += num_comp; 1370 } 1371 num_eval_modes_in[index] += q_comp; 1372 } 1373 } 1374 } 1375 1376 // Determine active output basis 1377 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1378 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1379 offset = 0; 1380 for (CeedInt i = 0; i < num_output_fields; i++) { 1381 CeedVector vec; 1382 1383 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1384 if (vec == CEED_VECTOR_ACTIVE) { 1385 CeedInt index = -1, num_comp, q_comp; 1386 CeedEvalMode eval_mode; 1387 CeedBasis basis_out = NULL; 1388 1389 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1390 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1391 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1392 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1393 for (CeedInt i = 0; i < num_active_bases_out; i++) { 1394 if ((*data)->active_bases_out[i] == basis_out) index = i; 1395 } 1396 if (index == -1) { 1397 CeedElemRestriction elem_rstr_out; 1398 1399 index = num_active_bases_out; 1400 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out)); 1401 (*data)->active_bases_out[num_active_bases_out] = NULL; 1402 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out])); 1403 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out)); 1404 (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL; 1405 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1406 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out])); 1407 CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out)); 1408 num_eval_modes_out[index] = 0; 1409 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out)); 1410 eval_modes_out[index] = NULL; 1411 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out)); 1412 eval_mode_offsets_out[index] = NULL; 1413 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out)); 1414 (*data)->assembled_bases_out[index] = NULL; 1415 num_active_bases_out++; 1416 } 1417 if (eval_mode != CEED_EVAL_WEIGHT) { 1418 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1419 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1420 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1421 for (CeedInt d = 0; d < q_comp; d++) { 1422 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1423 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1424 offset += num_comp; 1425 } 1426 num_eval_modes_out[index] += q_comp; 1427 } 1428 } 1429 } 1430 (*data)->num_active_bases_in = num_active_bases_in; 1431 (*data)->num_eval_modes_in = num_eval_modes_in; 1432 (*data)->eval_modes_in = eval_modes_in; 1433 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1434 (*data)->num_active_bases_out = num_active_bases_out; 1435 (*data)->num_eval_modes_out = num_eval_modes_out; 1436 (*data)->eval_modes_out = eval_modes_out; 1437 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1438 (*data)->num_output_components = offset; 1439 return CEED_ERROR_SUCCESS; 1440 } 1441 1442 /** 1443 @brief Get CeedOperator CeedEvalModes for assembly. 1444 1445 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1446 1447 @param[in] data CeedOperatorAssemblyData 1448 @param[out] num_active_bases_in Total number of active bases for input 1449 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1450 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1451 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1452 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1453 @param[out] num_active_bases_out Total number of active bases for output 1454 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1455 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1456 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1457 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1458 including contributions of all active bases 1459 1460 @return An error code: 0 - success, otherwise - failure 1461 1462 @ref Backend 1463 **/ 1464 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in, 1465 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out, 1466 CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, 1467 CeedSize *num_output_components) { 1468 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1469 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1470 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1471 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1472 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1473 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1474 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1475 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1476 if (num_output_components) *num_output_components = data->num_output_components; 1477 return CEED_ERROR_SUCCESS; 1478 } 1479 1480 /** 1481 @brief Get CeedOperator CeedBasis data for assembly. 1482 1483 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1484 1485 @param[in] data CeedOperatorAssemblyData 1486 @param[out] num_active_bases_in Number of active input bases, or NULL 1487 @param[out] active_bases_in Pointer to hold active input CeedBasis, or NULL 1488 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1489 @param[out] num_active_bases_out Number of active output bases, or NULL 1490 @param[out] active_bases_out Pointer to hold active output CeedBasis, or NULL 1491 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1492 1493 @return An error code: 0 - success, otherwise - failure 1494 1495 @ref Backend 1496 **/ 1497 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in, 1498 const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out, 1499 const CeedScalar ***assembled_bases_out) { 1500 // Assemble B_in, B_out if needed 1501 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1502 CeedInt num_qpts; 1503 1504 if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts)); 1505 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts)); 1506 for (CeedInt b = 0; b < data->num_active_bases_in; b++) { 1507 bool has_eval_none = false; 1508 CeedInt num_nodes; 1509 CeedScalar *B_in = NULL, *identity = NULL; 1510 1511 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes)); 1512 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1513 1514 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1515 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1516 } 1517 if (has_eval_none) { 1518 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1519 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1520 identity[i * num_nodes + i] = 1.0; 1521 } 1522 } 1523 1524 for (CeedInt q = 0; q < num_qpts; q++) { 1525 for (CeedInt n = 0; n < num_nodes; n++) { 1526 CeedInt d_in = 0, q_comp_in; 1527 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1528 1529 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1530 const CeedInt qq = data->num_eval_modes_in[b] * q; 1531 const CeedScalar *B = NULL; 1532 1533 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B)); 1534 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1535 if (q_comp_in > 1) { 1536 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1537 else B = &B[(++d_in) * num_qpts * num_nodes]; 1538 } 1539 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1540 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1541 } 1542 } 1543 } 1544 if (identity) CeedCall(CeedFree(&identity)); 1545 data->assembled_bases_in[b] = B_in; 1546 } 1547 } 1548 1549 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1550 CeedInt num_qpts; 1551 1552 if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts)); 1553 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts)); 1554 for (CeedInt b = 0; b < data->num_active_bases_out; b++) { 1555 bool has_eval_none = false; 1556 CeedInt num_nodes; 1557 CeedScalar *B_out = NULL, *identity = NULL; 1558 1559 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes)); 1560 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1561 1562 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1563 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1564 } 1565 if (has_eval_none) { 1566 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1567 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1568 identity[i * num_nodes + i] = 1.0; 1569 } 1570 } 1571 1572 for (CeedInt q = 0; q < num_qpts; q++) { 1573 for (CeedInt n = 0; n < num_nodes; n++) { 1574 CeedInt d_out = 0, q_comp_out; 1575 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1576 1577 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1578 const CeedInt qq = data->num_eval_modes_out[b] * q; 1579 const CeedScalar *B = NULL; 1580 1581 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B)); 1582 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1583 if (q_comp_out > 1) { 1584 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1585 else B = &B[(++d_out) * num_qpts * num_nodes]; 1586 } 1587 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1588 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1589 } 1590 } 1591 } 1592 if (identity) CeedCall(CeedFree(&identity)); 1593 data->assembled_bases_out[b] = B_out; 1594 } 1595 } 1596 1597 // Pass out assembled data 1598 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1599 if (active_bases_in) *active_bases_in = data->active_bases_in; 1600 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1601 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1602 if (active_bases_out) *active_bases_out = data->active_bases_out; 1603 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1604 return CEED_ERROR_SUCCESS; 1605 } 1606 1607 /** 1608 @brief Get CeedOperator CeedBasis data for assembly. 1609 1610 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1611 1612 @param[in] data CeedOperatorAssemblyData 1613 @param[out] num_active_elem_rstrs_in Number of active input element restrictions, or NULL 1614 @param[out] active_elem_rstrs_in Pointer to hold active input CeedElemRestrictions, or NULL 1615 @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL 1616 @param[out] active_elem_rstrs_out Pointer to hold active output CeedElemRestrictions, or NULL 1617 1618 @return An error code: 0 - success, otherwise - failure 1619 1620 @ref Backend 1621 **/ 1622 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in, 1623 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out, 1624 CeedElemRestriction **active_elem_rstrs_out) { 1625 if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in; 1626 if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in; 1627 if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out; 1628 if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out; 1629 return CEED_ERROR_SUCCESS; 1630 } 1631 1632 /** 1633 @brief Destroy CeedOperatorAssemblyData 1634 1635 @param[in,out] data CeedOperatorAssemblyData to destroy 1636 1637 @return An error code: 0 - success, otherwise - failure 1638 1639 @ref Backend 1640 **/ 1641 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1642 if (!*data) { 1643 *data = NULL; 1644 return CEED_ERROR_SUCCESS; 1645 } 1646 CeedCall(CeedDestroy(&(*data)->ceed)); 1647 for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) { 1648 CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b])); 1649 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b])); 1650 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1651 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1652 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1653 } 1654 for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) { 1655 CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b])); 1656 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b])); 1657 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1658 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1659 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1660 } 1661 CeedCall(CeedFree(&(*data)->active_bases_in)); 1662 CeedCall(CeedFree(&(*data)->active_bases_out)); 1663 CeedCall(CeedFree(&(*data)->active_elem_rstrs_in)); 1664 CeedCall(CeedFree(&(*data)->active_elem_rstrs_out)); 1665 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1666 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1667 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1668 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1669 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1670 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1671 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1672 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1673 1674 CeedCall(CeedFree(data)); 1675 return CEED_ERROR_SUCCESS; 1676 } 1677 1678 /// @} 1679 1680 /// ---------------------------------------------------------------------------- 1681 /// CeedOperator Public API 1682 /// ---------------------------------------------------------------------------- 1683 /// @addtogroup CeedOperatorUser 1684 /// @{ 1685 1686 /** 1687 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1688 1689 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1690 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1691 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1692 1693 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1694 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1695 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1696 1697 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1698 1699 @param[in] op CeedOperator to assemble CeedQFunction 1700 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1701 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1702 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1703 1704 @return An error code: 0 - success, otherwise - failure 1705 1706 @ref User 1707 **/ 1708 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1709 CeedCall(CeedOperatorCheckReady(op)); 1710 1711 if (op->LinearAssembleQFunction) { 1712 // Backend version 1713 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1714 } else { 1715 // Operator fallback 1716 CeedOperator op_fallback; 1717 1718 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1719 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1720 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1721 } 1722 return CEED_ERROR_SUCCESS; 1723 } 1724 1725 /** 1726 @brief Assemble CeedQFunction and store result internally. 1727 1728 Return copied references of stored data to the caller. 1729 Caller is responsible for ownership and destruction of the copied references. 1730 See also @ref CeedOperatorLinearAssembleQFunction 1731 1732 Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers. 1733 These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object. 1734 1735 @param[in] op CeedOperator to assemble CeedQFunction 1736 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1737 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1738 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1739 1740 @return An error code: 0 - success, otherwise - failure 1741 1742 @ref User 1743 **/ 1744 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1745 int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL; 1746 CeedOperator op_assemble = NULL; 1747 CeedOperator op_fallback_parent = NULL; 1748 1749 CeedCall(CeedOperatorCheckReady(op)); 1750 1751 // Determine if fallback parent or operator has implementation 1752 CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent)); 1753 if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) { 1754 // -- Backend version for op fallback parent is faster, if it exists 1755 LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate; 1756 op_assemble = op_fallback_parent; 1757 } else if (op->LinearAssembleQFunctionUpdate) { 1758 // -- Backend version for op 1759 LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate; 1760 op_assemble = op; 1761 } 1762 1763 // Assemble QFunction 1764 if (LinearAssembleQFunctionUpdate) { 1765 // Backend or fallback parent version 1766 bool qf_assembled_is_setup; 1767 CeedVector assembled_vec = NULL; 1768 CeedElemRestriction assembled_rstr = NULL; 1769 1770 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1771 if (qf_assembled_is_setup) { 1772 bool update_needed; 1773 1774 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1775 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1776 if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request)); 1777 } else { 1778 CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request)); 1779 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1780 } 1781 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1782 1783 // Copy reference from internally held copy 1784 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1785 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1786 CeedCall(CeedVectorDestroy(&assembled_vec)); 1787 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1788 } else { 1789 // Operator fallback 1790 CeedOperator op_fallback; 1791 1792 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1793 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1794 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1795 } 1796 return CEED_ERROR_SUCCESS; 1797 } 1798 1799 /** 1800 @brief Assemble the diagonal of a square linear CeedOperator 1801 1802 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1803 1804 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1805 1806 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1807 1808 @param[in] op CeedOperator to assemble CeedQFunction 1809 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1810 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1811 1812 @return An error code: 0 - success, otherwise - failure 1813 1814 @ref User 1815 **/ 1816 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1817 bool is_composite; 1818 CeedSize input_size = 0, output_size = 0; 1819 1820 CeedCall(CeedOperatorCheckReady(op)); 1821 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1822 1823 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1824 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1825 1826 // Early exit for empty operator 1827 if (!is_composite) { 1828 CeedInt num_elem = 0; 1829 1830 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1831 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1832 } 1833 1834 if (op->LinearAssembleDiagonal) { 1835 // Backend version 1836 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1837 return CEED_ERROR_SUCCESS; 1838 } else if (op->LinearAssembleAddDiagonal) { 1839 // Backend version with zeroing first 1840 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1841 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1842 return CEED_ERROR_SUCCESS; 1843 } else { 1844 // Operator fallback 1845 CeedOperator op_fallback; 1846 1847 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1848 if (op_fallback) { 1849 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1850 return CEED_ERROR_SUCCESS; 1851 } 1852 } 1853 // Default interface implementation 1854 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1855 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1856 return CEED_ERROR_SUCCESS; 1857 } 1858 1859 /** 1860 @brief Assemble the diagonal of a square linear CeedOperator 1861 1862 This sums into a CeedVector the diagonal of a linear CeedOperator. 1863 1864 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1865 1866 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1867 1868 @param[in] op CeedOperator to assemble CeedQFunction 1869 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1870 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1871 1872 @return An error code: 0 - success, otherwise - failure 1873 1874 @ref User 1875 **/ 1876 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1877 bool is_composite; 1878 CeedSize input_size = 0, output_size = 0; 1879 1880 CeedCall(CeedOperatorCheckReady(op)); 1881 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1882 1883 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1884 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1885 1886 // Early exit for empty operator 1887 if (!is_composite) { 1888 CeedInt num_elem = 0; 1889 1890 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1891 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1892 } 1893 1894 if (op->LinearAssembleAddDiagonal) { 1895 // Backend version 1896 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1897 return CEED_ERROR_SUCCESS; 1898 } else { 1899 // Operator fallback 1900 CeedOperator op_fallback; 1901 1902 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1903 if (op_fallback) { 1904 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1905 return CEED_ERROR_SUCCESS; 1906 } 1907 } 1908 // Default interface implementation 1909 if (is_composite) { 1910 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1911 } else { 1912 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1913 } 1914 return CEED_ERROR_SUCCESS; 1915 } 1916 1917 /** 1918 @brief Fully assemble the point-block diagonal pattern of a linear operator. 1919 1920 Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal(). 1921 1922 The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the 1923 matrix in entry (i, j). 1924 Note that the (i, j) pairs are unique. 1925 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in 1926 the same ordering. 1927 1928 This will generally be slow unless your operator is low-order. 1929 1930 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1931 1932 @param[in] op CeedOperator to assemble 1933 @param[out] num_entries Number of entries in coordinate nonzero pattern 1934 @param[out] rows Row number for each entry 1935 @param[out] cols Column number for each entry 1936 1937 @ref User 1938 **/ 1939 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1940 Ceed ceed; 1941 bool is_composite; 1942 CeedInt num_active_components, num_sub_operators; 1943 CeedOperator *sub_operators; 1944 1945 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1946 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1947 1948 CeedSize input_size = 0, output_size = 0; 1949 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1950 CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1951 1952 if (is_composite) { 1953 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators)); 1954 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1955 } else { 1956 sub_operators = &op; 1957 num_sub_operators = 1; 1958 } 1959 1960 // Verify operator can be assembled correctly 1961 { 1962 CeedOperatorAssemblyData data; 1963 CeedInt num_active_elem_rstrs, comp_stride; 1964 CeedElemRestriction *active_elem_rstrs; 1965 1966 // Get initial values to check against 1967 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data)); 1968 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 1969 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride)); 1970 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components)); 1971 1972 // Verify that all active element restrictions have same component stride and number of components 1973 for (CeedInt k = 0; k < num_sub_operators; k++) { 1974 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data)); 1975 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 1976 for (CeedInt i = 0; i < num_active_elem_rstrs; i++) { 1977 CeedInt comp_stride_sub, num_active_components_sub; 1978 1979 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub)); 1980 CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION, 1981 "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub); 1982 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub)); 1983 CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE, 1984 "All suboperators must have the same number of output components"); 1985 } 1986 } 1987 } 1988 *num_entries = input_size * num_active_components; 1989 CeedCall(CeedCalloc(*num_entries, rows)); 1990 CeedCall(CeedCalloc(*num_entries, cols)); 1991 1992 for (CeedInt o = 0; o < num_sub_operators; o++) { 1993 CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr; 1994 CeedInt comp_stride, num_elem, elem_size; 1995 const CeedInt *offsets, *point_block_offsets; 1996 1997 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr)); 1998 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride)); 1999 CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem)); 2000 CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size)); 2001 CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets)); 2002 2003 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr)); 2004 CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets)); 2005 2006 for (CeedSize i = 0; i < num_elem * elem_size; i++) { 2007 for (CeedInt c_out = 0; c_out < num_active_components; c_out++) { 2008 for (CeedInt c_in = 0; c_in < num_active_components; c_in++) { 2009 (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride; 2010 (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride; 2011 } 2012 } 2013 } 2014 2015 CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets)); 2016 CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets)); 2017 CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr)); 2018 } 2019 return CEED_ERROR_SUCCESS; 2020 } 2021 2022 /** 2023 @brief Assemble the point block diagonal of a square linear CeedOperator 2024 2025 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 2026 2027 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 2028 2029 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2030 2031 @param[in] op CeedOperator to assemble CeedQFunction 2032 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 2033 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 2034 component in]. 2035 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2036 2037 @return An error code: 0 - success, otherwise - failure 2038 2039 @ref User 2040 **/ 2041 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2042 bool is_composite; 2043 CeedSize input_size = 0, output_size = 0; 2044 2045 CeedCall(CeedOperatorCheckReady(op)); 2046 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2047 2048 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2049 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2050 2051 // Early exit for empty operator 2052 if (!is_composite) { 2053 CeedInt num_elem = 0; 2054 2055 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2056 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2057 } 2058 2059 if (op->LinearAssemblePointBlockDiagonal) { 2060 // Backend version 2061 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 2062 return CEED_ERROR_SUCCESS; 2063 } else if (op->LinearAssembleAddPointBlockDiagonal) { 2064 // Backend version with zeroing first 2065 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2066 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2067 return CEED_ERROR_SUCCESS; 2068 } else { 2069 // Operator fallback 2070 CeedOperator op_fallback; 2071 2072 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2073 if (op_fallback) { 2074 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 2075 return CEED_ERROR_SUCCESS; 2076 } 2077 } 2078 // Default interface implementation 2079 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2080 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2081 return CEED_ERROR_SUCCESS; 2082 } 2083 2084 /** 2085 @brief Assemble the point block diagonal of a square linear CeedOperator 2086 2087 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 2088 2089 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 2090 2091 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2092 2093 @param[in] op CeedOperator to assemble CeedQFunction 2094 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 2095 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 2096 component in]. 2097 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2098 2099 @return An error code: 0 - success, otherwise - failure 2100 2101 @ref User 2102 **/ 2103 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2104 bool is_composite; 2105 CeedSize input_size = 0, output_size = 0; 2106 2107 CeedCall(CeedOperatorCheckReady(op)); 2108 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2109 2110 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2111 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2112 2113 // Early exit for empty operator 2114 if (!is_composite) { 2115 CeedInt num_elem = 0; 2116 2117 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2118 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2119 } 2120 2121 if (op->LinearAssembleAddPointBlockDiagonal) { 2122 // Backend version 2123 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2124 return CEED_ERROR_SUCCESS; 2125 } else { 2126 // Operator fallback 2127 CeedOperator op_fallback; 2128 2129 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2130 if (op_fallback) { 2131 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 2132 return CEED_ERROR_SUCCESS; 2133 } 2134 } 2135 // Default interface implementation 2136 if (is_composite) { 2137 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 2138 } else { 2139 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 2140 } 2141 return CEED_ERROR_SUCCESS; 2142 } 2143 2144 /** 2145 @brief Fully assemble the nonzero pattern of a linear operator. 2146 2147 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 2148 2149 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 2150 matrix in entry (i, j). 2151 Note that the (i, j) pairs are not unique and may repeat. 2152 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 2153 2154 This will generally be slow unless your operator is low-order. 2155 2156 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2157 2158 @param[in] op CeedOperator to assemble 2159 @param[out] num_entries Number of entries in coordinate nonzero pattern 2160 @param[out] rows Row number for each entry 2161 @param[out] cols Column number for each entry 2162 2163 @ref User 2164 **/ 2165 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 2166 bool is_composite; 2167 CeedInt num_suboperators, offset = 0; 2168 CeedSize single_entries; 2169 CeedOperator *sub_operators; 2170 2171 CeedCall(CeedOperatorCheckReady(op)); 2172 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2173 2174 if (op->LinearAssembleSymbolic) { 2175 // Backend version 2176 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 2177 return CEED_ERROR_SUCCESS; 2178 } else { 2179 // Operator fallback 2180 CeedOperator op_fallback; 2181 2182 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2183 if (op_fallback) { 2184 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 2185 return CEED_ERROR_SUCCESS; 2186 } 2187 } 2188 2189 // Default interface implementation 2190 2191 // Count entries and allocate rows, cols arrays 2192 *num_entries = 0; 2193 if (is_composite) { 2194 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2195 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2196 for (CeedInt k = 0; k < num_suboperators; ++k) { 2197 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2198 *num_entries += single_entries; 2199 } 2200 } else { 2201 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 2202 *num_entries += single_entries; 2203 } 2204 CeedCall(CeedCalloc(*num_entries, rows)); 2205 CeedCall(CeedCalloc(*num_entries, cols)); 2206 2207 // Assemble nonzero locations 2208 if (is_composite) { 2209 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2210 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2211 for (CeedInt k = 0; k < num_suboperators; ++k) { 2212 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 2213 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2214 offset += single_entries; 2215 } 2216 } else { 2217 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 2218 } 2219 return CEED_ERROR_SUCCESS; 2220 } 2221 2222 /** 2223 @brief Fully assemble the nonzero entries of a linear operator. 2224 2225 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 2226 2227 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 2228 matrix in entry (i, j). 2229 Note that the (i, j) pairs are not unique and may repeat. 2230 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 2231 2232 This will generally be slow unless your operator is low-order. 2233 2234 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2235 2236 @param[in] op CeedOperator to assemble 2237 @param[out] values Values to assemble into matrix 2238 2239 @ref User 2240 **/ 2241 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 2242 bool is_composite; 2243 CeedInt num_suboperators, offset = 0; 2244 CeedSize single_entries = 0; 2245 CeedOperator *sub_operators; 2246 2247 CeedCall(CeedOperatorCheckReady(op)); 2248 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2249 2250 // Early exit for empty operator 2251 if (!is_composite) { 2252 CeedInt num_elem = 0; 2253 2254 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2255 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2256 } 2257 2258 if (op->LinearAssemble) { 2259 // Backend version 2260 CeedCall(op->LinearAssemble(op, values)); 2261 return CEED_ERROR_SUCCESS; 2262 } else { 2263 // Operator fallback 2264 CeedOperator op_fallback; 2265 2266 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2267 if (op_fallback) { 2268 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2269 return CEED_ERROR_SUCCESS; 2270 } 2271 } 2272 2273 // Default interface implementation 2274 CeedCall(CeedVectorSetValue(values, 0.0)); 2275 if (is_composite) { 2276 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2277 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2278 for (CeedInt k = 0; k < num_suboperators; k++) { 2279 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2280 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2281 offset += single_entries; 2282 } 2283 } else { 2284 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2285 } 2286 return CEED_ERROR_SUCCESS; 2287 } 2288 2289 /** 2290 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2291 2292 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2293 2294 @param[in] op Composite CeedOperator 2295 @param[in] num_skip_indices Number of suboperators to skip 2296 @param[in] skip_indices Array of indices of suboperators to skip 2297 @param[out] mult Vector to store multiplicity (of size l_size) 2298 2299 @return An error code: 0 - success, otherwise - failure 2300 2301 @ref User 2302 **/ 2303 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2304 Ceed ceed; 2305 CeedInt num_suboperators; 2306 CeedSize l_vec_len; 2307 CeedScalar *mult_array; 2308 CeedVector ones_l_vec; 2309 CeedElemRestriction elem_rstr, mult_elem_rstr; 2310 CeedOperator *sub_operators; 2311 2312 CeedCall(CeedOperatorCheckReady(op)); 2313 2314 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2315 2316 // Zero mult vector 2317 CeedCall(CeedVectorSetValue(mult, 0.0)); 2318 2319 // Get suboperators 2320 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2321 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2322 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2323 2324 // Work vector 2325 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2326 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2327 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2328 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2329 2330 // Compute multiplicity across suboperators 2331 for (CeedInt i = 0; i < num_suboperators; i++) { 2332 const CeedScalar *sub_mult_array; 2333 CeedVector sub_mult_l_vec, ones_e_vec; 2334 2335 // -- Check for suboperator to skip 2336 for (CeedInt j = 0; j < num_skip_indices; j++) { 2337 if (skip_indices[j] == i) continue; 2338 } 2339 2340 // -- Sub operator multiplicity 2341 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2342 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr)); 2343 CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2344 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2345 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2346 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2347 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2348 // ---- Flag every node present in the current suboperator 2349 for (CeedInt j = 0; j < l_vec_len; j++) { 2350 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2351 } 2352 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2353 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2354 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2355 CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr)); 2356 } 2357 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2358 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2359 return CEED_ERROR_SUCCESS; 2360 } 2361 2362 /** 2363 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2364 grid interpolation 2365 2366 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2367 2368 @param[in] op_fine Fine grid operator 2369 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2370 @param[in] rstr_coarse Coarse grid restriction 2371 @param[in] basis_coarse Coarse grid active vector basis 2372 @param[out] op_coarse Coarse grid operator 2373 @param[out] op_prolong Coarse to fine operator, or NULL 2374 @param[out] op_restrict Fine to coarse operator, or NULL 2375 2376 @return An error code: 0 - success, otherwise - failure 2377 2378 @ref User 2379 **/ 2380 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2381 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2382 CeedBasis basis_c_to_f = NULL; 2383 2384 CeedCall(CeedOperatorCheckReady(op_fine)); 2385 2386 // Build prolongation matrix, if required 2387 if (op_prolong || op_restrict) { 2388 CeedBasis basis_fine; 2389 2390 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2391 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2392 } 2393 2394 // Core code 2395 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2396 return CEED_ERROR_SUCCESS; 2397 } 2398 2399 /** 2400 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2401 2402 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2403 2404 @param[in] op_fine Fine grid operator 2405 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2406 @param[in] rstr_coarse Coarse grid restriction 2407 @param[in] basis_coarse Coarse grid active vector basis 2408 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2409 @param[out] op_coarse Coarse grid operator 2410 @param[out] op_prolong Coarse to fine operator, or NULL 2411 @param[out] op_restrict Fine to coarse operator, or NULL 2412 2413 @return An error code: 0 - success, otherwise - failure 2414 2415 @ref User 2416 **/ 2417 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2418 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2419 CeedOperator *op_restrict) { 2420 Ceed ceed; 2421 CeedInt Q_f, Q_c; 2422 CeedBasis basis_fine, basis_c_to_f = NULL; 2423 2424 CeedCall(CeedOperatorCheckReady(op_fine)); 2425 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2426 2427 // Check for compatible quadrature spaces 2428 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2429 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2430 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2431 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2432 2433 // Create coarse to fine basis, if required 2434 if (op_prolong || op_restrict) { 2435 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2436 CeedScalar *q_ref, *q_weight, *grad; 2437 2438 // Check if interpolation matrix is provided 2439 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2440 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2441 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2442 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2443 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2444 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2445 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2446 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2447 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2448 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2449 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2450 CeedCall(CeedFree(&q_ref)); 2451 CeedCall(CeedFree(&q_weight)); 2452 CeedCall(CeedFree(&grad)); 2453 } 2454 2455 // Core code 2456 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2457 return CEED_ERROR_SUCCESS; 2458 } 2459 2460 /** 2461 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2462 2463 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2464 2465 @param[in] op_fine Fine grid operator 2466 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2467 @param[in] rstr_coarse Coarse grid restriction 2468 @param[in] basis_coarse Coarse grid active vector basis 2469 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2470 @param[out] op_coarse Coarse grid operator 2471 @param[out] op_prolong Coarse to fine operator, or NULL 2472 @param[out] op_restrict Fine to coarse operator, or NULL 2473 2474 @return An error code: 0 - success, otherwise - failure 2475 2476 @ref User 2477 **/ 2478 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2479 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2480 CeedOperator *op_restrict) { 2481 Ceed ceed; 2482 CeedInt Q_f, Q_c; 2483 CeedBasis basis_fine, basis_c_to_f = NULL; 2484 2485 CeedCall(CeedOperatorCheckReady(op_fine)); 2486 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2487 2488 // Check for compatible quadrature spaces 2489 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2490 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2491 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2492 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2493 2494 // Coarse to fine basis 2495 if (op_prolong || op_restrict) { 2496 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2497 CeedScalar *q_ref, *q_weight, *grad; 2498 CeedElemTopology topo; 2499 2500 // Check if interpolation matrix is provided 2501 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2502 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2503 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2504 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2505 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2506 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2507 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2508 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2509 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2510 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2511 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2512 CeedCall(CeedFree(&q_ref)); 2513 CeedCall(CeedFree(&q_weight)); 2514 CeedCall(CeedFree(&grad)); 2515 } 2516 2517 // Core code 2518 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2519 return CEED_ERROR_SUCCESS; 2520 } 2521 2522 /** 2523 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2524 2525 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2526 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2527 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2528 \hat S V\f$. 2529 The CeedOperator must be linear and non-composite. 2530 The associated CeedQFunction must therefore also be linear. 2531 2532 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2533 2534 @param[in] op CeedOperator to create element inverses 2535 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2536 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2537 2538 @return An error code: 0 - success, otherwise - failure 2539 2540 @ref User 2541 **/ 2542 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2543 Ceed ceed, ceed_parent; 2544 bool interp = false, grad = false, is_tensor_basis = true; 2545 CeedInt num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2546 CeedSize l_size = 1; 2547 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg; 2548 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2549 CeedVector q_data; 2550 CeedElemRestriction rstr = NULL, rstr_qd_i; 2551 CeedBasis basis = NULL, fdm_basis; 2552 CeedQFunctionContext ctx_fdm; 2553 CeedQFunctionField *qf_fields; 2554 CeedQFunction qf, qf_fdm; 2555 CeedOperatorField *op_fields; 2556 2557 CeedCall(CeedOperatorCheckReady(op)); 2558 2559 if (op->CreateFDMElementInverse) { 2560 // Backend version 2561 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2562 return CEED_ERROR_SUCCESS; 2563 } else { 2564 // Operator fallback 2565 CeedOperator op_fallback; 2566 2567 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2568 if (op_fallback) { 2569 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2570 return CEED_ERROR_SUCCESS; 2571 } 2572 } 2573 2574 // Default interface implementation 2575 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2576 CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent)); 2577 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2578 2579 // Determine active input basis 2580 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2581 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2582 for (CeedInt i = 0; i < num_input_fields; i++) { 2583 CeedVector vec; 2584 2585 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2586 if (vec == CEED_VECTOR_ACTIVE) { 2587 CeedEvalMode eval_mode; 2588 2589 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2590 interp = interp || eval_mode == CEED_EVAL_INTERP; 2591 grad = grad || eval_mode == CEED_EVAL_GRAD; 2592 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2593 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2594 } 2595 } 2596 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2597 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2598 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2599 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2600 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2601 CeedCall(CeedBasisGetDimension(basis, &dim)); 2602 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2603 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2604 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2605 2606 // Build and diagonalize 1D Mass and Laplacian 2607 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2608 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2609 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2610 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2611 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2612 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2613 CeedCall(CeedCalloc(P_1d, &lambda)); 2614 // -- Build matrices 2615 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2616 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2617 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2618 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2619 2620 // -- Diagonalize 2621 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2622 CeedCall(CeedFree(&mass)); 2623 CeedCall(CeedFree(&laplace)); 2624 for (CeedInt i = 0; i < P_1d; i++) { 2625 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2626 } 2627 CeedCall(CeedFree(&x)); 2628 2629 { 2630 CeedInt layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2631 CeedScalar max_norm = 0; 2632 const CeedScalar *assembled_array, *q_weight_array; 2633 CeedVector assembled = NULL, q_weight; 2634 CeedElemRestriction rstr_qf = NULL; 2635 2636 // Assemble QFunction 2637 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2638 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2639 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2640 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2641 2642 // Calculate element averages 2643 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2644 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2645 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2646 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2647 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2648 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2649 2650 for (CeedInt e = 0; e < num_elem; e++) { 2651 CeedInt count = 0; 2652 2653 for (CeedInt q = 0; q < num_qpts; q++) { 2654 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2655 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2656 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2657 count++; 2658 } 2659 } 2660 } 2661 if (count) { 2662 elem_avg[e] /= count; 2663 } else { 2664 elem_avg[e] = 1.0; 2665 } 2666 } 2667 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2668 CeedCall(CeedVectorDestroy(&assembled)); 2669 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2670 CeedCall(CeedVectorDestroy(&q_weight)); 2671 } 2672 2673 // Build FDM diagonal 2674 { 2675 CeedScalar *q_data_array, *fdm_diagonal; 2676 2677 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2678 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2679 for (CeedInt c = 0; c < num_comp; c++) { 2680 for (CeedInt n = 0; n < num_nodes; n++) { 2681 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2682 if (grad) { 2683 for (CeedInt d = 0; d < dim; d++) { 2684 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2685 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2686 } 2687 } 2688 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2689 } 2690 } 2691 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2692 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2693 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2694 for (CeedInt e = 0; e < num_elem; e++) { 2695 for (CeedInt c = 0; c < num_comp; c++) { 2696 for (CeedInt n = 0; n < num_nodes; n++) 2697 q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2698 } 2699 } 2700 CeedCall(CeedFree(&elem_avg)); 2701 CeedCall(CeedFree(&fdm_diagonal)); 2702 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2703 } 2704 2705 // Setup FDM operator 2706 // -- Basis 2707 { 2708 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2709 2710 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2711 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2712 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2713 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2714 CeedCall(CeedFree(&fdm_interp)); 2715 CeedCall(CeedFree(&grad_dummy)); 2716 CeedCall(CeedFree(&q_ref_dummy)); 2717 CeedCall(CeedFree(&q_weight_dummy)); 2718 CeedCall(CeedFree(&lambda)); 2719 } 2720 2721 // -- Restriction 2722 { 2723 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2724 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2725 } 2726 2727 // -- QFunction 2728 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2729 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2730 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2731 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2732 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2733 2734 // -- QFunction context 2735 { 2736 CeedInt *num_comp_data; 2737 2738 CeedCall(CeedCalloc(1, &num_comp_data)); 2739 num_comp_data[0] = num_comp; 2740 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2741 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2742 } 2743 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2744 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2745 2746 // -- Operator 2747 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2748 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2749 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data)); 2750 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2751 2752 // Cleanup 2753 CeedCall(CeedVectorDestroy(&q_data)); 2754 CeedCall(CeedBasisDestroy(&fdm_basis)); 2755 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2756 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2757 return CEED_ERROR_SUCCESS; 2758 } 2759 2760 /// @} 2761