1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) { 128 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 } 130 // Cleanup 131 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 133 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 134 } 135 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 136 CeedCall(CeedOperatorCheckReady(op_fallback)); 137 op->op_fallback = op_fallback; 138 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 144 145 @param[in] op CeedOperator to retrieve fallback for 146 @param[out] op_fallback Fallback CeedOperator 147 148 @return An error code: 0 - success, otherwise - failure 149 150 @ref Developer 151 **/ 152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 153 // Create if needed 154 if (!op->op_fallback) { 155 CeedCall(CeedOperatorCreateFallback(op)); 156 } 157 if (op->op_fallback) { 158 bool is_debug; 159 160 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 161 if (is_debug) { 162 Ceed ceed, ceed_fallback; 163 const char *resource, *resource_fallback; 164 165 CeedCall(CeedOperatorGetCeed(op, &ceed)); 166 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 167 CeedCall(CeedGetResource(ceed, &resource)); 168 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 169 170 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 171 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 172 op->op_fallback); 173 } 174 } 175 *op_fallback = op->op_fallback; 176 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /** 181 @brief Select correct basis matrix pointer based on CeedEvalMode 182 183 @param[in] basis CeedBasis from which to get the basis matrix 184 @param[in] eval_mode Current basis evaluation mode 185 @param[in] identity Pointer to identity matrix 186 @param[out] basis_ptr Basis pointer to set 187 188 @ref Developer 189 **/ 190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 191 switch (eval_mode) { 192 case CEED_EVAL_NONE: 193 *basis_ptr = identity; 194 break; 195 case CEED_EVAL_INTERP: 196 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 197 break; 198 case CEED_EVAL_GRAD: 199 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 200 break; 201 case CEED_EVAL_DIV: 202 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 203 break; 204 case CEED_EVAL_CURL: 205 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 206 break; 207 case CEED_EVAL_WEIGHT: 208 break; // Caught by QF Assembly 209 } 210 assert(*basis_ptr != NULL); 211 212 return CEED_ERROR_SUCCESS; 213 } 214 215 /** 216 @brief Create point block restriction for active operator field 217 218 @param[in] rstr Original CeedElemRestriction for active field 219 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 220 221 @return An error code: 0 - success, otherwise - failure 222 223 @ref Developer 224 **/ 225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 226 Ceed ceed; 227 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 228 const CeedInt *offsets; 229 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 230 231 // Expand offsets 232 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 233 CeedSize l_size; 234 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 235 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 236 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 237 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 238 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 239 CeedInt shift = num_comp; 240 if (comp_stride != 1) shift *= num_comp; 241 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 242 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 243 pointblock_offsets[i] = offsets[i] * shift; 244 } 245 246 // Create new restriction 247 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 248 pointblock_offsets, pointblock_rstr)); 249 250 // Cleanup 251 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 252 253 return CEED_ERROR_SUCCESS; 254 } 255 256 /** 257 @brief Core logic for assembling operator diagonal or point block diagonal 258 259 @param[in] op CeedOperator to assemble point block diagonal 260 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 261 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 262 @param[out] assembled CeedVector to store assembled diagonal 263 264 @return An error code: 0 - success, otherwise - failure 265 266 @ref Developer 267 **/ 268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 269 Ceed ceed; 270 CeedCall(CeedOperatorGetCeed(op, &ceed)); 271 272 // Assemble QFunction 273 CeedQFunction qf; 274 const CeedScalar *assembled_qf_array; 275 CeedVector assembled_qf; 276 CeedElemRestriction assembled_elem_rstr; 277 CeedInt num_input_fields, num_output_fields; 278 CeedInt layout[3]; 279 280 CeedCall(CeedOperatorGetQFunction(op, &qf)); 281 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 282 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 283 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 284 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 285 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 286 287 // Get assembly data 288 CeedOperatorAssemblyData data; 289 const CeedEvalMode **eval_modes_in, **eval_modes_out; 290 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 291 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 292 CeedBasis *active_bases; 293 CeedElemRestriction *active_elem_rstrs; 294 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 295 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 296 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 297 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 298 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 299 300 // Loop over all active bases 301 for (CeedInt b = 0; b < num_active_bases; b++) { 302 // Assemble point block diagonal restriction, if needed 303 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 304 305 if (is_pointblock) { 306 CeedElemRestriction point_block_elem_rstr; 307 308 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 309 diag_elem_rstr = point_block_elem_rstr; 310 } 311 312 // Create diagonal vector 313 CeedVector elem_diag; 314 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 315 316 // Assemble element operator diagonals 317 CeedScalar *elem_diag_array; 318 CeedInt num_elem, num_nodes, num_qpts, num_components; 319 320 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 321 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 322 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 323 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 324 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 325 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 326 327 // Construct identity matrix for basis if required 328 bool has_eval_none = false; 329 CeedScalar *identity = NULL; 330 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 331 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 332 } 333 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 334 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 335 } 336 if (has_eval_none) { 337 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 338 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 339 } 340 341 // Compute the diagonal of B^T D B 342 // Each element 343 for (CeedInt e = 0; e < num_elem; e++) { 344 // Each basis eval mode pair 345 CeedInt d_out = 0, q_comp_out; 346 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 347 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 348 const CeedScalar *B_t = NULL; 349 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t); 350 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out)); 351 if (q_comp_out > 1) { 352 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 353 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 354 } 355 eval_mode_out_prev = eval_modes_out[b][e_out]; 356 357 CeedInt d_in = 0, q_comp_in; 358 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 359 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 360 const CeedScalar *B = NULL; 361 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B); 362 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in)); 363 if (q_comp_in > 1) { 364 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 365 else B = &B[(++d_in) * num_qpts * num_nodes]; 366 } 367 eval_mode_in_prev = eval_modes_in[b][e_in]; 368 369 // Each component 370 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 371 // Each qpt/node pair 372 for (CeedInt q = 0; q < num_qpts; q++) { 373 if (is_pointblock) { 374 // Point Block Diagonal 375 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 376 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 377 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 378 for (CeedInt n = 0; n < num_nodes; n++) { 379 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 380 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 381 } 382 } 383 } else { 384 // Diagonal Only 385 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 386 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 387 for (CeedInt n = 0; n < num_nodes; n++) { 388 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 389 } 390 } 391 } 392 } 393 } 394 } 395 } 396 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 397 398 // Assemble local operator diagonal 399 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 400 401 // Cleanup 402 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 403 CeedCall(CeedVectorDestroy(&elem_diag)); 404 CeedCall(CeedFree(&identity)); 405 } 406 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 407 CeedCall(CeedVectorDestroy(&assembled_qf)); 408 409 return CEED_ERROR_SUCCESS; 410 } 411 412 /** 413 @brief Core logic for assembling composite operator diagonal 414 415 @param[in] op CeedOperator to assemble point block diagonal 416 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 417 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 418 @param[out] assembled CeedVector to store assembled diagonal 419 420 @return An error code: 0 - success, otherwise - failure 421 422 @ref Developer 423 **/ 424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 CeedInt num_sub; 427 CeedOperator *suboperators; 428 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 429 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 430 for (CeedInt i = 0; i < num_sub; i++) { 431 if (is_pointblock) { 432 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 433 } else { 434 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 435 } 436 } 437 return CEED_ERROR_SUCCESS; 438 } 439 440 /** 441 @brief Build nonzero pattern for non-composite operator 442 443 Users should generally use CeedOperatorLinearAssembleSymbolic() 444 445 @param[in] op CeedOperator to assemble nonzero pattern 446 @param[in] offset Offset for number of entries 447 @param[out] rows Row number for each entry 448 @param[out] cols Column number for each entry 449 450 @return An error code: 0 - success, otherwise - failure 451 452 @ref Developer 453 **/ 454 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 455 Ceed ceed; 456 bool is_composite; 457 CeedCall(CeedOperatorGetCeed(op, &ceed)); 458 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 459 460 if (is_composite) { 461 // LCOV_EXCL_START 462 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 463 // LCOV_EXCL_STOP 464 } 465 466 CeedSize num_nodes; 467 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 468 CeedElemRestriction rstr_in; 469 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 470 CeedInt num_elem, elem_size, num_comp; 471 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 472 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 473 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 474 CeedInt layout_er[3]; 475 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 476 477 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 478 479 // Determine elem_dof relation 480 CeedVector index_vec; 481 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 482 CeedScalar *array; 483 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 484 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 485 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 486 CeedVector elem_dof; 487 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 488 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 489 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 490 const CeedScalar *elem_dof_a; 491 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 492 CeedCall(CeedVectorDestroy(&index_vec)); 493 494 // Determine i, j locations for element matrices 495 CeedInt count = 0; 496 for (CeedInt e = 0; e < num_elem; e++) { 497 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 498 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 499 for (CeedInt i = 0; i < elem_size; i++) { 500 for (CeedInt j = 0; j < elem_size; j++) { 501 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 502 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 503 504 const CeedInt row = elem_dof_a[elem_dof_index_row]; 505 const CeedInt col = elem_dof_a[elem_dof_index_col]; 506 507 rows[offset + count] = row; 508 cols[offset + count] = col; 509 count++; 510 } 511 } 512 } 513 } 514 } 515 if (count != local_num_entries) { 516 // LCOV_EXCL_START 517 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 518 // LCOV_EXCL_STOP 519 } 520 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 521 CeedCall(CeedVectorDestroy(&elem_dof)); 522 523 return CEED_ERROR_SUCCESS; 524 } 525 526 /** 527 @brief Assemble nonzero entries for non-composite operator 528 529 Users should generally use CeedOperatorLinearAssemble() 530 531 @param[in] op CeedOperator to assemble 532 @param[in] offset Offset for number of entries 533 @param[out] values Values to assemble into matrix 534 535 @return An error code: 0 - success, otherwise - failure 536 537 @ref Developer 538 **/ 539 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 540 Ceed ceed; 541 bool is_composite; 542 CeedCall(CeedOperatorGetCeed(op, &ceed)); 543 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 544 545 if (is_composite) { 546 // LCOV_EXCL_START 547 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 548 // LCOV_EXCL_STOP 549 } 550 551 // Early exit for empty operator 552 { 553 CeedInt num_elem = 0; 554 555 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 556 if (num_elem == 0) return CEED_ERROR_SUCCESS; 557 } 558 559 if (op->LinearAssembleSingle) { 560 // Backend version 561 CeedCall(op->LinearAssembleSingle(op, offset, values)); 562 return CEED_ERROR_SUCCESS; 563 } else { 564 // Operator fallback 565 CeedOperator op_fallback; 566 567 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 568 if (op_fallback) { 569 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 570 return CEED_ERROR_SUCCESS; 571 } 572 } 573 574 // Assemble QFunction 575 CeedQFunction qf; 576 CeedCall(CeedOperatorGetQFunction(op, &qf)); 577 CeedVector assembled_qf; 578 CeedElemRestriction rstr_q; 579 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 580 CeedSize qf_length; 581 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 582 583 CeedInt num_input_fields, num_output_fields; 584 CeedOperatorField *input_fields; 585 CeedOperatorField *output_fields; 586 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 587 588 // Get assembly data 589 CeedOperatorAssemblyData data; 590 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 591 const CeedEvalMode **eval_modes_in, **eval_modes_out; 592 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 593 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 594 &eval_modes_out, NULL, NULL)); 595 CeedBasis *bases; 596 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 597 CeedBasis basis_in = bases[0]; 598 599 if (num_active_bases > 1) { 600 // LCOV_EXCL_START 601 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 602 // LCOV_EXCL_STOP 603 } 604 if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) { 605 // LCOV_EXCL_START 606 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 607 // LCOV_EXCL_STOP 608 } 609 610 CeedElemRestriction active_rstr; 611 CeedInt num_elem, elem_size, num_qpts, num_comp; 612 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 613 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 614 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 615 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 616 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 617 618 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 619 620 // loop over elements and put in data structure 621 const CeedScalar *assembled_qf_array; 622 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 623 624 CeedInt layout_qf[3]; 625 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 626 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 627 628 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 629 const CeedScalar **B_mats_in, **B_mats_out; 630 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 631 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 632 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 633 CeedScalar elem_mat[elem_size * elem_size]; 634 CeedInt count = 0; 635 CeedScalar *vals; 636 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 637 for (CeedInt e = 0; e < num_elem; e++) { 638 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 639 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 640 // Compute B^T*D 641 for (CeedInt n = 0; n < elem_size; n++) { 642 for (CeedInt q = 0; q < num_qpts; q++) { 643 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 644 const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 645 CeedScalar sum = 0.0; 646 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 647 const CeedInt b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 648 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 649 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 650 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 651 } 652 BTD_mat[btd_index] = sum; 653 } 654 } 655 } 656 // form element matrix itself (for each block component) 657 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 658 659 // put element matrix in coordinate data structure 660 for (CeedInt i = 0; i < elem_size; i++) { 661 for (CeedInt j = 0; j < elem_size; j++) { 662 vals[offset + count] = elem_mat[i * elem_size + j]; 663 count++; 664 } 665 } 666 } 667 } 668 } 669 if (count != local_num_entries) { 670 // LCOV_EXCL_START 671 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 672 // LCOV_EXCL_STOP 673 } 674 CeedCall(CeedVectorRestoreArray(values, &vals)); 675 676 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 677 CeedCall(CeedVectorDestroy(&assembled_qf)); 678 679 return CEED_ERROR_SUCCESS; 680 } 681 682 /** 683 @brief Count number of entries for assembled CeedOperator 684 685 @param[in] op CeedOperator to assemble 686 @param[out] num_entries Number of entries in assembled representation 687 688 @return An error code: 0 - success, otherwise - failure 689 690 @ref Utility 691 **/ 692 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 693 bool is_composite; 694 CeedElemRestriction rstr; 695 CeedInt num_elem, elem_size, num_comp; 696 697 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 698 if (is_composite) { 699 // LCOV_EXCL_START 700 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 701 // LCOV_EXCL_STOP 702 } 703 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 704 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 705 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 706 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 707 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 708 709 return CEED_ERROR_SUCCESS; 710 } 711 712 /** 713 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 714 715 @param[in] op_fine Fine grid operator 716 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 717 @param[in] rstr_coarse Coarse grid restriction 718 @param[in] basis_coarse Coarse grid active vector basis 719 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 720 @param[out] op_coarse Coarse grid operator 721 @param[out] op_prolong Coarse to fine operator, or NULL 722 @param[out] op_restrict Fine to coarse operator, or NULL 723 724 @return An error code: 0 - success, otherwise - failure 725 726 @ref Developer 727 **/ 728 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 729 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 730 Ceed ceed; 731 CeedVector mult_vec = NULL; 732 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 733 734 // Check for composite operator 735 bool is_composite; 736 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 737 if (is_composite) { 738 // LCOV_EXCL_START 739 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 740 // LCOV_EXCL_STOP 741 } 742 743 // Coarse Grid 744 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 745 CeedElemRestriction rstr_fine = NULL; 746 // -- Clone input fields 747 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 748 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 749 rstr_fine = op_fine->input_fields[i]->elem_rstr; 750 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 751 } else { 752 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 753 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 754 } 755 } 756 // -- Clone output fields 757 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 758 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 759 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 760 } else { 761 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 762 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 763 } 764 } 765 // -- Clone QFunctionAssemblyData 766 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 767 768 // Multiplicity vector 769 if (op_restrict || op_prolong) { 770 CeedVector mult_e_vec; 771 772 if (!p_mult_fine) { 773 // LCOV_EXCL_START 774 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 775 // LCOV_EXCL_STOP 776 } 777 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 778 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 779 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 780 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 781 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 782 CeedCall(CeedVectorDestroy(&mult_e_vec)); 783 CeedCall(CeedVectorReciprocal(mult_vec)); 784 } 785 786 // Clone name 787 bool has_name = op_fine->name; 788 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 789 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 790 791 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 792 if ((op_restrict || op_prolong) && !basis_c_to_f) { 793 // LCOV_EXCL_START 794 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis"); 795 // LCOV_EXCL_STOP 796 } 797 798 // Restriction/Prolongation Operators 799 CeedInt num_comp; 800 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 801 802 // Restriction 803 if (op_restrict) { 804 CeedInt *num_comp_r_data; 805 CeedQFunction qf_restrict; 806 CeedQFunctionContext ctx_r; 807 808 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 809 CeedCall(CeedCalloc(1, &num_comp_r_data)); 810 num_comp_r_data[0] = num_comp; 811 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 812 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 813 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 814 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 815 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 816 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 817 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 818 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 819 820 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 821 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 822 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 823 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 824 825 // Set name 826 char *restriction_name; 827 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 828 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 829 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 830 CeedCall(CeedFree(&restriction_name)); 831 832 // Check 833 CeedCall(CeedOperatorCheckReady(*op_restrict)); 834 835 // Cleanup 836 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 837 } 838 839 // Prolongation 840 if (op_prolong) { 841 CeedInt *num_comp_p_data; 842 CeedQFunction qf_prolong; 843 CeedQFunctionContext ctx_p; 844 845 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 846 CeedCall(CeedCalloc(1, &num_comp_p_data)); 847 num_comp_p_data[0] = num_comp; 848 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 849 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 850 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 851 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 852 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 853 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 854 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 855 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 856 857 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 858 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 859 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 860 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 861 862 // Set name 863 char *prolongation_name; 864 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 865 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 866 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 867 CeedCall(CeedFree(&prolongation_name)); 868 869 // Check 870 CeedCall(CeedOperatorCheckReady(*op_prolong)); 871 872 // Cleanup 873 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 874 } 875 876 // Check 877 CeedCall(CeedOperatorCheckReady(*op_coarse)); 878 879 // Cleanup 880 CeedCall(CeedVectorDestroy(&mult_vec)); 881 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 882 883 return CEED_ERROR_SUCCESS; 884 } 885 886 /** 887 @brief Build 1D mass matrix and Laplacian with perturbation 888 889 @param[in] interp_1d Interpolation matrix in one dimension 890 @param[in] grad_1d Gradient matrix in one dimension 891 @param[in] q_weight_1d Quadrature weights in one dimension 892 @param[in] P_1d Number of basis nodes in one dimension 893 @param[in] Q_1d Number of quadrature points in one dimension 894 @param[in] dim Dimension of basis 895 @param[out] mass Assembled mass matrix in one dimension 896 @param[out] laplace Assembled perturbed Laplacian in one dimension 897 898 @return An error code: 0 - success, otherwise - failure 899 900 @ref Developer 901 **/ 902 CeedPragmaOptimizeOff 903 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 904 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 905 for (CeedInt i = 0; i < P_1d; i++) { 906 for (CeedInt j = 0; j < P_1d; j++) { 907 CeedScalar sum = 0.0; 908 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 909 mass[i + j * P_1d] = sum; 910 } 911 } 912 // -- Laplacian 913 for (CeedInt i = 0; i < P_1d; i++) { 914 for (CeedInt j = 0; j < P_1d; j++) { 915 CeedScalar sum = 0.0; 916 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 917 laplace[i + j * P_1d] = sum; 918 } 919 } 920 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 921 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 922 return CEED_ERROR_SUCCESS; 923 } 924 CeedPragmaOptimizeOn 925 926 /// @} 927 928 /// ---------------------------------------------------------------------------- 929 /// CeedOperator Backend API 930 /// ---------------------------------------------------------------------------- 931 /// @addtogroup CeedOperatorBackend 932 /// @{ 933 934 /** 935 @brief Create object holding CeedQFunction assembly data for CeedOperator 936 937 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 938 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 939 940 @return An error code: 0 - success, otherwise - failure 941 942 @ref Backend 943 **/ 944 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 945 CeedCall(CeedCalloc(1, data)); 946 (*data)->ref_count = 1; 947 (*data)->ceed = ceed; 948 CeedCall(CeedReference(ceed)); 949 950 return CEED_ERROR_SUCCESS; 951 } 952 953 /** 954 @brief Increment the reference counter for a CeedQFunctionAssemblyData 955 956 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 957 958 @return An error code: 0 - success, otherwise - failure 959 960 @ref Backend 961 **/ 962 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 963 data->ref_count++; 964 return CEED_ERROR_SUCCESS; 965 } 966 967 /** 968 @brief Set re-use of CeedQFunctionAssemblyData 969 970 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 971 @param[in] reuse_data Boolean flag indicating data re-use 972 973 @return An error code: 0 - success, otherwise - failure 974 975 @ref Backend 976 **/ 977 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 978 data->reuse_data = reuse_data; 979 data->needs_data_update = true; 980 return CEED_ERROR_SUCCESS; 981 } 982 983 /** 984 @brief Mark QFunctionAssemblyData as stale 985 986 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 987 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 988 989 @return An error code: 0 - success, otherwise - failure 990 991 @ref Backend 992 **/ 993 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 994 data->needs_data_update = needs_data_update; 995 return CEED_ERROR_SUCCESS; 996 } 997 998 /** 999 @brief Determine if QFunctionAssemblyData needs update 1000 1001 @param[in] data CeedQFunctionAssemblyData to mark as stale 1002 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1003 1004 @return An error code: 0 - success, otherwise - failure 1005 1006 @ref Backend 1007 **/ 1008 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1009 *is_update_needed = !data->reuse_data || data->needs_data_update; 1010 return CEED_ERROR_SUCCESS; 1011 } 1012 1013 /** 1014 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1015 1016 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1017 1018 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1019 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1020 CeedQFunctionAssemblyData. 1021 1022 @param[in] data CeedQFunctionAssemblyData to copy reference to 1023 @param[in,out] data_copy Variable to store copied reference 1024 1025 @return An error code: 0 - success, otherwise - failure 1026 1027 @ref Backend 1028 **/ 1029 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1030 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1031 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1032 *data_copy = data; 1033 return CEED_ERROR_SUCCESS; 1034 } 1035 1036 /** 1037 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1038 1039 @param[in] data CeedQFunctionAssemblyData to retrieve status 1040 @param[out] is_setup Boolean flag for setup status 1041 1042 @return An error code: 0 - success, otherwise - failure 1043 1044 @ref Backend 1045 **/ 1046 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1047 *is_setup = data->is_setup; 1048 return CEED_ERROR_SUCCESS; 1049 } 1050 1051 /** 1052 @brief Set internal objects for CeedQFunctionAssemblyData 1053 1054 @param[in,out] data CeedQFunctionAssemblyData to set objects 1055 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1056 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1057 1058 @return An error code: 0 - success, otherwise - failure 1059 1060 @ref Backend 1061 **/ 1062 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1063 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1064 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1065 1066 data->is_setup = true; 1067 return CEED_ERROR_SUCCESS; 1068 } 1069 1070 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1071 if (!data->is_setup) { 1072 // LCOV_EXCL_START 1073 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1074 // LCOV_EXCL_STOP 1075 } 1076 1077 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1078 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1079 1080 return CEED_ERROR_SUCCESS; 1081 } 1082 1083 /** 1084 @brief Destroy CeedQFunctionAssemblyData 1085 1086 @param[in,out] data CeedQFunctionAssemblyData to destroy 1087 1088 @return An error code: 0 - success, otherwise - failure 1089 1090 @ref Backend 1091 **/ 1092 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1093 if (!*data || --(*data)->ref_count > 0) { 1094 *data = NULL; 1095 return CEED_ERROR_SUCCESS; 1096 } 1097 CeedCall(CeedDestroy(&(*data)->ceed)); 1098 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1099 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1100 1101 CeedCall(CeedFree(data)); 1102 return CEED_ERROR_SUCCESS; 1103 } 1104 1105 /** 1106 @brief Get CeedOperatorAssemblyData 1107 1108 @param[in] op CeedOperator to assemble 1109 @param[out] data CeedQFunctionAssemblyData 1110 1111 @return An error code: 0 - success, otherwise - failure 1112 1113 @ref Backend 1114 **/ 1115 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1116 if (!op->op_assembled) { 1117 CeedOperatorAssemblyData data; 1118 1119 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1120 op->op_assembled = data; 1121 } 1122 *data = op->op_assembled; 1123 1124 return CEED_ERROR_SUCCESS; 1125 } 1126 1127 /** 1128 @brief Create object holding CeedOperator assembly data. 1129 1130 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1131 An array with references to the corresponding active CeedElemRestrictions is also stored. 1132 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1133 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1134 CeedEvalMode. 1135 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1136 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1137 1138 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1139 @param[in] op CeedOperator to be assembled 1140 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1141 1142 @return An error code: 0 - success, otherwise - failure 1143 1144 @ref Backend 1145 **/ 1146 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1147 CeedInt num_active_bases = 0; 1148 1149 // Allocate 1150 CeedCall(CeedCalloc(1, data)); 1151 (*data)->ceed = ceed; 1152 CeedCall(CeedReference(ceed)); 1153 1154 // Build OperatorAssembly data 1155 CeedQFunction qf; 1156 CeedQFunctionField *qf_fields; 1157 CeedOperatorField *op_fields; 1158 CeedInt num_input_fields; 1159 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1160 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1161 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1162 1163 // Determine active input basis 1164 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1165 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1166 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1167 for (CeedInt i = 0; i < num_input_fields; i++) { 1168 CeedVector vec; 1169 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1170 if (vec == CEED_VECTOR_ACTIVE) { 1171 CeedBasis basis_in = NULL; 1172 CeedEvalMode eval_mode; 1173 CeedInt index = -1, dim, num_comp, q_comp; 1174 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1175 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1176 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1177 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1178 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1179 for (CeedInt i = 0; i < num_active_bases; i++) { 1180 if ((*data)->active_bases[i] == basis_in) index = i; 1181 } 1182 if (index == -1) { 1183 CeedElemRestriction elem_rstr_in; 1184 index = num_active_bases; 1185 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1186 (*data)->active_bases[num_active_bases] = NULL; 1187 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1188 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1189 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1190 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1191 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1192 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1193 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1194 num_eval_modes_in[index] = 0; 1195 num_eval_modes_out[index] = 0; 1196 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1197 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1198 eval_modes_in[index] = NULL; 1199 eval_modes_out[index] = NULL; 1200 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1201 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1202 eval_mode_offsets_in[index] = NULL; 1203 eval_mode_offsets_out[index] = NULL; 1204 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1205 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1206 (*data)->assembled_bases_in[index] = NULL; 1207 (*data)->assembled_bases_out[index] = NULL; 1208 num_active_bases++; 1209 } 1210 if (eval_mode != CEED_EVAL_WEIGHT) { 1211 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1212 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1213 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1214 for (CeedInt d = 0; d < q_comp; d++) { 1215 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1216 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1217 offset += num_comp; 1218 } 1219 num_eval_modes_in[index] += q_comp; 1220 } 1221 } 1222 } 1223 (*data)->num_eval_modes_in = num_eval_modes_in; 1224 (*data)->eval_modes_in = eval_modes_in; 1225 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1226 1227 // Determine active output basis 1228 CeedInt num_output_fields; 1229 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1230 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1231 offset = 0; 1232 for (CeedInt i = 0; i < num_output_fields; i++) { 1233 CeedVector vec; 1234 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1235 if (vec == CEED_VECTOR_ACTIVE) { 1236 CeedBasis basis_out = NULL; 1237 CeedEvalMode eval_mode; 1238 CeedInt index = -1, dim, num_comp, q_comp; 1239 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1240 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1241 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1242 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1243 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1244 for (CeedInt i = 0; i < num_active_bases; i++) { 1245 if ((*data)->active_bases[i] == basis_out) index = i; 1246 } 1247 if (index == -1) { 1248 CeedElemRestriction elem_rstr_out; 1249 1250 index = num_active_bases; 1251 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1252 (*data)->active_bases[num_active_bases] = NULL; 1253 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1254 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1255 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1256 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1257 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1258 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1259 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1260 num_eval_modes_in[index] = 0; 1261 num_eval_modes_out[index] = 0; 1262 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1263 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1264 eval_modes_in[index] = NULL; 1265 eval_modes_out[index] = NULL; 1266 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1267 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1268 eval_mode_offsets_in[index] = NULL; 1269 eval_mode_offsets_out[index] = NULL; 1270 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1271 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1272 (*data)->assembled_bases_in[index] = NULL; 1273 (*data)->assembled_bases_out[index] = NULL; 1274 num_active_bases++; 1275 } 1276 if (eval_mode != CEED_EVAL_WEIGHT) { 1277 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1278 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1279 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1280 for (CeedInt d = 0; d < q_comp; d++) { 1281 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1282 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1283 offset += num_comp; 1284 } 1285 num_eval_modes_out[index] += q_comp; 1286 } 1287 } 1288 } 1289 (*data)->num_output_components = offset; 1290 (*data)->num_eval_modes_out = num_eval_modes_out; 1291 (*data)->eval_modes_out = eval_modes_out; 1292 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1293 (*data)->num_active_bases = num_active_bases; 1294 1295 return CEED_ERROR_SUCCESS; 1296 } 1297 1298 /** 1299 @brief Get CeedOperator CeedEvalModes for assembly. 1300 1301 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1302 1303 @param[in] data CeedOperatorAssemblyData 1304 @param[out] num_active_bases Total number of active bases 1305 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1306 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1307 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1308 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1309 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1310 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1311 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1312 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1313 including contributions of all active bases 1314 1315 @return An error code: 0 - success, otherwise - failure 1316 1317 1318 @ref Backend 1319 **/ 1320 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1321 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1322 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1323 if (num_active_bases) *num_active_bases = data->num_active_bases; 1324 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1325 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1326 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1327 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1328 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1329 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1330 if (num_output_components) *num_output_components = data->num_output_components; 1331 1332 return CEED_ERROR_SUCCESS; 1333 } 1334 1335 /** 1336 @brief Get CeedOperator CeedBasis data for assembly. 1337 1338 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1339 1340 @param[in] data CeedOperatorAssemblyData 1341 @param[out] num_active_bases Number of active bases, or NULL 1342 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1343 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1344 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1345 1346 @return An error code: 0 - success, otherwise - failure 1347 1348 @ref Backend 1349 **/ 1350 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1351 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1352 // Assemble B_in, B_out if needed 1353 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1354 CeedInt num_qpts; 1355 1356 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1357 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1358 CeedInt num_nodes; 1359 CeedScalar *B_in = NULL, *identity = NULL; 1360 bool has_eval_none = false; 1361 1362 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1363 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1364 1365 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1366 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1367 } 1368 if (has_eval_none) { 1369 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1370 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1371 identity[i * num_nodes + i] = 1.0; 1372 } 1373 } 1374 1375 for (CeedInt q = 0; q < num_qpts; q++) { 1376 for (CeedInt n = 0; n < num_nodes; n++) { 1377 CeedInt d_in = 0, q_comp_in; 1378 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1379 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1380 const CeedInt qq = data->num_eval_modes_in[b] * q; 1381 const CeedScalar *B = NULL; 1382 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B); 1383 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1384 if (q_comp_in > 1) { 1385 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1386 else B = &B[(++d_in) * num_qpts * num_nodes]; 1387 } 1388 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1389 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1390 } 1391 } 1392 } 1393 if (identity) CeedCall(CeedFree(identity)); 1394 data->assembled_bases_in[b] = B_in; 1395 } 1396 } 1397 1398 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1399 CeedInt num_qpts; 1400 1401 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1402 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1403 CeedInt num_nodes; 1404 bool has_eval_none = false; 1405 CeedScalar *B_out = NULL, *identity = NULL; 1406 1407 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1408 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1409 1410 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1411 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1412 } 1413 if (has_eval_none) { 1414 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1415 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1416 identity[i * num_nodes + i] = 1.0; 1417 } 1418 } 1419 1420 for (CeedInt q = 0; q < num_qpts; q++) { 1421 for (CeedInt n = 0; n < num_nodes; n++) { 1422 CeedInt d_out = 0, q_comp_out; 1423 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1424 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1425 const CeedInt qq = data->num_eval_modes_out[b] * q; 1426 const CeedScalar *B = NULL; 1427 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B); 1428 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1429 if (q_comp_out > 1) { 1430 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1431 else B = &B[(++d_out) * num_qpts * num_nodes]; 1432 } 1433 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1434 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1435 } 1436 } 1437 } 1438 if (identity) CeedCall(CeedFree(identity)); 1439 data->assembled_bases_out[b] = B_out; 1440 } 1441 } 1442 1443 // Pass out assembled data 1444 if (active_bases) *active_bases = data->active_bases; 1445 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1446 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1447 1448 return CEED_ERROR_SUCCESS; 1449 } 1450 1451 /** 1452 @brief Get CeedOperator CeedBasis data for assembly. 1453 1454 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1455 1456 @param[in] data CeedOperatorAssemblyData 1457 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1458 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1459 1460 @return An error code: 0 - success, otherwise - failure 1461 1462 @ref Backend 1463 **/ 1464 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1465 CeedElemRestriction **active_elem_rstrs) { 1466 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1467 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1468 1469 return CEED_ERROR_SUCCESS; 1470 } 1471 1472 /** 1473 @brief Destroy CeedOperatorAssemblyData 1474 1475 @param[in,out] data CeedOperatorAssemblyData to destroy 1476 1477 @return An error code: 0 - success, otherwise - failure 1478 1479 @ref Backend 1480 **/ 1481 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1482 if (!*data) { 1483 *data = NULL; 1484 return CEED_ERROR_SUCCESS; 1485 } 1486 CeedCall(CeedDestroy(&(*data)->ceed)); 1487 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1488 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1489 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1490 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1491 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1492 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1493 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1494 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1495 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1496 } 1497 CeedCall(CeedFree(&(*data)->active_bases)); 1498 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1499 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1500 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1501 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1502 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1503 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1504 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1505 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1506 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1507 1508 CeedCall(CeedFree(data)); 1509 return CEED_ERROR_SUCCESS; 1510 } 1511 1512 /// @} 1513 1514 /// ---------------------------------------------------------------------------- 1515 /// CeedOperator Public API 1516 /// ---------------------------------------------------------------------------- 1517 /// @addtogroup CeedOperatorUser 1518 /// @{ 1519 1520 /** 1521 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1522 1523 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1524 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1525 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1526 1527 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1528 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1529 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1530 1531 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1532 1533 @param[in] op CeedOperator to assemble CeedQFunction 1534 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1535 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1536 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1537 1538 @return An error code: 0 - success, otherwise - failure 1539 1540 @ref User 1541 **/ 1542 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1543 CeedCall(CeedOperatorCheckReady(op)); 1544 1545 if (op->LinearAssembleQFunction) { 1546 // Backend version 1547 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1548 } else { 1549 // Operator fallback 1550 CeedOperator op_fallback; 1551 1552 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1553 if (op_fallback) { 1554 CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1555 } else { 1556 // LCOV_EXCL_START 1557 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1558 // LCOV_EXCL_STOP 1559 } 1560 } 1561 return CEED_ERROR_SUCCESS; 1562 } 1563 1564 /** 1565 @brief Assemble CeedQFunction and store result internally. 1566 1567 Return copied references of stored data to the caller. 1568 Caller is responsible for ownership and destruction of the copied references. 1569 See also @ref CeedOperatorLinearAssembleQFunction 1570 1571 @param[in] op CeedOperator to assemble CeedQFunction 1572 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1573 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1574 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1575 1576 @return An error code: 0 - success, otherwise - failure 1577 1578 @ref User 1579 **/ 1580 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1581 CeedCall(CeedOperatorCheckReady(op)); 1582 1583 if (op->LinearAssembleQFunctionUpdate) { 1584 // Backend version 1585 bool qf_assembled_is_setup; 1586 CeedVector assembled_vec = NULL; 1587 CeedElemRestriction assembled_rstr = NULL; 1588 1589 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1590 if (qf_assembled_is_setup) { 1591 bool update_needed; 1592 1593 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1594 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1595 if (update_needed) { 1596 CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1597 } 1598 } else { 1599 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1600 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1601 } 1602 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1603 1604 // Copy reference from internally held copy 1605 *assembled = NULL; 1606 *rstr = NULL; 1607 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1608 CeedCall(CeedVectorDestroy(&assembled_vec)); 1609 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1610 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1611 } else { 1612 // Operator fallback 1613 CeedOperator op_fallback; 1614 1615 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1616 if (op_fallback) { 1617 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1618 } else { 1619 // LCOV_EXCL_START 1620 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1621 // LCOV_EXCL_STOP 1622 } 1623 } 1624 1625 return CEED_ERROR_SUCCESS; 1626 } 1627 1628 /** 1629 @brief Assemble the diagonal of a square linear CeedOperator 1630 1631 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1632 1633 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1634 1635 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1636 1637 @param[in] op CeedOperator to assemble CeedQFunction 1638 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1639 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1640 1641 @return An error code: 0 - success, otherwise - failure 1642 1643 @ref User 1644 **/ 1645 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1646 bool is_composite; 1647 CeedCall(CeedOperatorCheckReady(op)); 1648 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1649 1650 CeedSize input_size = 0, output_size = 0; 1651 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1652 if (input_size != output_size) { 1653 // LCOV_EXCL_START 1654 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1655 // LCOV_EXCL_STOP 1656 } 1657 1658 // Early exit for empty operator 1659 if (!is_composite) { 1660 CeedInt num_elem = 0; 1661 1662 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1663 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1664 } 1665 1666 if (op->LinearAssembleDiagonal) { 1667 // Backend version 1668 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1669 return CEED_ERROR_SUCCESS; 1670 } else if (op->LinearAssembleAddDiagonal) { 1671 // Backend version with zeroing first 1672 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1673 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1674 return CEED_ERROR_SUCCESS; 1675 } else { 1676 // Operator fallback 1677 CeedOperator op_fallback; 1678 1679 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1680 if (op_fallback) { 1681 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1682 return CEED_ERROR_SUCCESS; 1683 } 1684 } 1685 // Default interface implementation 1686 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1687 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1688 1689 return CEED_ERROR_SUCCESS; 1690 } 1691 1692 /** 1693 @brief Assemble the diagonal of a square linear CeedOperator 1694 1695 This sums into a CeedVector the diagonal of a linear CeedOperator. 1696 1697 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1698 1699 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1700 1701 @param[in] op CeedOperator to assemble CeedQFunction 1702 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1703 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1704 1705 @return An error code: 0 - success, otherwise - failure 1706 1707 @ref User 1708 **/ 1709 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1710 bool is_composite; 1711 CeedCall(CeedOperatorCheckReady(op)); 1712 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1713 1714 CeedSize input_size = 0, output_size = 0; 1715 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1716 if (input_size != output_size) { 1717 // LCOV_EXCL_START 1718 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1719 // LCOV_EXCL_STOP 1720 } 1721 1722 // Early exit for empty operator 1723 if (!is_composite) { 1724 CeedInt num_elem = 0; 1725 1726 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1727 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1728 } 1729 1730 if (op->LinearAssembleAddDiagonal) { 1731 // Backend version 1732 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1733 return CEED_ERROR_SUCCESS; 1734 } else { 1735 // Operator fallback 1736 CeedOperator op_fallback; 1737 1738 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1739 if (op_fallback) { 1740 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1741 return CEED_ERROR_SUCCESS; 1742 } 1743 } 1744 // Default interface implementation 1745 if (is_composite) { 1746 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1747 } else { 1748 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1749 } 1750 1751 return CEED_ERROR_SUCCESS; 1752 } 1753 1754 /** 1755 @brief Assemble the point block diagonal of a square linear CeedOperator 1756 1757 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1758 1759 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1760 1761 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1762 1763 @param[in] op CeedOperator to assemble CeedQFunction 1764 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1765 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1766 component in]. 1767 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1768 1769 @return An error code: 0 - success, otherwise - failure 1770 1771 @ref User 1772 **/ 1773 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1774 bool is_composite; 1775 CeedCall(CeedOperatorCheckReady(op)); 1776 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1777 1778 CeedSize input_size = 0, output_size = 0; 1779 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1780 if (input_size != output_size) { 1781 // LCOV_EXCL_START 1782 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1783 // LCOV_EXCL_STOP 1784 } 1785 1786 // Early exit for empty operator 1787 if (!is_composite) { 1788 CeedInt num_elem = 0; 1789 1790 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1791 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1792 } 1793 1794 if (op->LinearAssemblePointBlockDiagonal) { 1795 // Backend version 1796 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1797 return CEED_ERROR_SUCCESS; 1798 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1799 // Backend version with zeroing first 1800 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1801 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1802 return CEED_ERROR_SUCCESS; 1803 } else { 1804 // Operator fallback 1805 CeedOperator op_fallback; 1806 1807 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1808 if (op_fallback) { 1809 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1810 return CEED_ERROR_SUCCESS; 1811 } 1812 } 1813 // Default interface implementation 1814 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1815 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1816 1817 return CEED_ERROR_SUCCESS; 1818 } 1819 1820 /** 1821 @brief Assemble the point block diagonal of a square linear CeedOperator 1822 1823 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1824 1825 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1826 1827 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1828 1829 @param[in] op CeedOperator to assemble CeedQFunction 1830 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1831 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1832 component in]. 1833 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1834 1835 @return An error code: 0 - success, otherwise - failure 1836 1837 @ref User 1838 **/ 1839 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1840 bool is_composite; 1841 CeedCall(CeedOperatorCheckReady(op)); 1842 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1843 1844 CeedSize input_size = 0, output_size = 0; 1845 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1846 if (input_size != output_size) { 1847 // LCOV_EXCL_START 1848 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1849 // LCOV_EXCL_STOP 1850 } 1851 1852 // Early exit for empty operator 1853 if (!is_composite) { 1854 CeedInt num_elem = 0; 1855 1856 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1857 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1858 } 1859 1860 if (op->LinearAssembleAddPointBlockDiagonal) { 1861 // Backend version 1862 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1863 return CEED_ERROR_SUCCESS; 1864 } else { 1865 // Operator fallback 1866 CeedOperator op_fallback; 1867 1868 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1869 if (op_fallback) { 1870 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1871 return CEED_ERROR_SUCCESS; 1872 } 1873 } 1874 // Default interface implementation 1875 if (is_composite) { 1876 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1877 } else { 1878 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1879 } 1880 1881 return CEED_ERROR_SUCCESS; 1882 } 1883 1884 /** 1885 @brief Fully assemble the nonzero pattern of a linear operator. 1886 1887 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1888 1889 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1890 matrix in entry (i, j). 1891 Note that the (i, j) pairs are not unique and may repeat. 1892 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1893 1894 This will generally be slow unless your operator is low-order. 1895 1896 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1897 1898 @param[in] op CeedOperator to assemble 1899 @param[out] num_entries Number of entries in coordinate nonzero pattern 1900 @param[out] rows Row number for each entry 1901 @param[out] cols Column number for each entry 1902 1903 @ref User 1904 **/ 1905 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1906 CeedInt num_suboperators, single_entries; 1907 CeedOperator *sub_operators; 1908 bool is_composite; 1909 CeedCall(CeedOperatorCheckReady(op)); 1910 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1911 1912 if (op->LinearAssembleSymbolic) { 1913 // Backend version 1914 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1915 return CEED_ERROR_SUCCESS; 1916 } else { 1917 // Operator fallback 1918 CeedOperator op_fallback; 1919 1920 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1921 if (op_fallback) { 1922 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1923 return CEED_ERROR_SUCCESS; 1924 } 1925 } 1926 1927 // Default interface implementation 1928 1929 // count entries and allocate rows, cols arrays 1930 *num_entries = 0; 1931 if (is_composite) { 1932 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1933 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1934 for (CeedInt k = 0; k < num_suboperators; ++k) { 1935 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1936 *num_entries += single_entries; 1937 } 1938 } else { 1939 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1940 *num_entries += single_entries; 1941 } 1942 CeedCall(CeedCalloc(*num_entries, rows)); 1943 CeedCall(CeedCalloc(*num_entries, cols)); 1944 1945 // assemble nonzero locations 1946 CeedInt offset = 0; 1947 if (is_composite) { 1948 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1949 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1950 for (CeedInt k = 0; k < num_suboperators; ++k) { 1951 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1952 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1953 offset += single_entries; 1954 } 1955 } else { 1956 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1957 } 1958 1959 return CEED_ERROR_SUCCESS; 1960 } 1961 1962 /** 1963 @brief Fully assemble the nonzero entries of a linear operator. 1964 1965 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1966 1967 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1968 matrix in entry (i, j). 1969 Note that the (i, j) pairs are not unique and may repeat. 1970 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1971 1972 This will generally be slow unless your operator is low-order. 1973 1974 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1975 1976 @param[in] op CeedOperator to assemble 1977 @param[out] values Values to assemble into matrix 1978 1979 @ref User 1980 **/ 1981 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1982 CeedInt num_suboperators, single_entries = 0; 1983 CeedOperator *sub_operators; 1984 bool is_composite; 1985 CeedCall(CeedOperatorCheckReady(op)); 1986 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1987 1988 // Early exit for empty operator 1989 if (!is_composite) { 1990 CeedInt num_elem = 0; 1991 1992 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1993 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1994 } 1995 1996 if (op->LinearAssemble) { 1997 // Backend version 1998 CeedCall(op->LinearAssemble(op, values)); 1999 return CEED_ERROR_SUCCESS; 2000 } else { 2001 // Operator fallback 2002 CeedOperator op_fallback; 2003 2004 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2005 if (op_fallback) { 2006 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2007 return CEED_ERROR_SUCCESS; 2008 } 2009 } 2010 2011 // Default interface implementation 2012 CeedInt offset = 0; 2013 CeedCall(CeedVectorSetValue(values, 0.0)); 2014 if (is_composite) { 2015 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2016 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2017 for (CeedInt k = 0; k < num_suboperators; k++) { 2018 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2019 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2020 offset += single_entries; 2021 } 2022 } else { 2023 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2024 } 2025 2026 return CEED_ERROR_SUCCESS; 2027 } 2028 2029 /** 2030 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2031 2032 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2033 2034 @param[in] op Composite CeedOperator 2035 @param[in] num_skip_indices Number of suboperators to skip 2036 @param[in] skip_indices Array of indices of suboperators to skip 2037 @param[out] mult Vector to store multiplicity (of size l_size) 2038 2039 @return An error code: 0 - success, otherwise - failure 2040 2041 @ref User 2042 **/ 2043 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2044 CeedCall(CeedOperatorCheckReady(op)); 2045 2046 Ceed ceed; 2047 CeedInt num_suboperators; 2048 CeedSize l_vec_len; 2049 CeedScalar *mult_array; 2050 CeedVector ones_l_vec; 2051 CeedElemRestriction elem_rstr; 2052 CeedOperator *sub_operators; 2053 2054 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2055 2056 // Zero mult vector 2057 CeedCall(CeedVectorSetValue(mult, 0.0)); 2058 2059 // Get suboperators 2060 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2061 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2062 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2063 2064 // Work vector 2065 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2066 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2067 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2068 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2069 2070 // Compute multiplicity across suboperators 2071 for (CeedInt i = 0; i < num_suboperators; i++) { 2072 const CeedScalar *sub_mult_array; 2073 CeedVector sub_mult_l_vec, ones_e_vec; 2074 2075 // -- Check for suboperator to skip 2076 for (CeedInt j = 0; j < num_skip_indices; j++) { 2077 if (skip_indices[j] == i) continue; 2078 } 2079 2080 // -- Sub operator multiplicity 2081 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2082 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2083 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2084 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2085 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2086 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2087 // ---- Flag every node present in the current suboperator 2088 for (CeedInt j = 0; j < l_vec_len; j++) { 2089 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2090 } 2091 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2092 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2093 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2094 } 2095 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2096 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2097 2098 return CEED_ERROR_SUCCESS; 2099 } 2100 2101 /** 2102 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2103 grid interpolation 2104 2105 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2106 2107 @param[in] op_fine Fine grid operator 2108 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2109 @param[in] rstr_coarse Coarse grid restriction 2110 @param[in] basis_coarse Coarse grid active vector basis 2111 @param[out] op_coarse Coarse grid operator 2112 @param[out] op_prolong Coarse to fine operator, or NULL 2113 @param[out] op_restrict Fine to coarse operator, or NULL 2114 2115 @return An error code: 0 - success, otherwise - failure 2116 2117 @ref User 2118 **/ 2119 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2120 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2121 CeedCall(CeedOperatorCheckReady(op_fine)); 2122 2123 // Build prolongation matrix, if required 2124 CeedBasis basis_c_to_f = NULL; 2125 if (op_prolong || op_restrict) { 2126 CeedBasis basis_fine; 2127 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2128 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2129 } 2130 2131 // Core code 2132 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2133 2134 return CEED_ERROR_SUCCESS; 2135 } 2136 2137 /** 2138 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2139 2140 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2141 2142 @param[in] op_fine Fine grid operator 2143 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2144 @param[in] rstr_coarse Coarse grid restriction 2145 @param[in] basis_coarse Coarse grid active vector basis 2146 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2147 @param[out] op_coarse Coarse grid operator 2148 @param[out] op_prolong Coarse to fine operator, or NULL 2149 @param[out] op_restrict Fine to coarse operator, or NULL 2150 2151 @return An error code: 0 - success, otherwise - failure 2152 2153 @ref User 2154 **/ 2155 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2156 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2157 CeedOperator *op_restrict) { 2158 CeedCall(CeedOperatorCheckReady(op_fine)); 2159 Ceed ceed; 2160 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2161 2162 // Check for compatible quadrature spaces 2163 CeedBasis basis_fine; 2164 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2165 CeedInt Q_f, Q_c; 2166 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2167 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2168 if (Q_f != Q_c) { 2169 // LCOV_EXCL_START 2170 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2171 // LCOV_EXCL_STOP 2172 } 2173 2174 // Create coarse to fine basis, if required 2175 CeedBasis basis_c_to_f = NULL; 2176 if (op_prolong || op_restrict) { 2177 // Check if interpolation matrix is provided 2178 if (!interp_c_to_f) { 2179 // LCOV_EXCL_START 2180 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2181 // LCOV_EXCL_STOP 2182 } 2183 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2184 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2185 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2186 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2187 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2188 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2189 CeedScalar *q_ref, *q_weight, *grad; 2190 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2191 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2192 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2193 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2194 CeedCall(CeedFree(&q_ref)); 2195 CeedCall(CeedFree(&q_weight)); 2196 CeedCall(CeedFree(&grad)); 2197 } 2198 2199 // Core code 2200 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2201 return CEED_ERROR_SUCCESS; 2202 } 2203 2204 /** 2205 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2206 2207 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2208 2209 @param[in] op_fine Fine grid operator 2210 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2211 @param[in] rstr_coarse Coarse grid restriction 2212 @param[in] basis_coarse Coarse grid active vector basis 2213 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2214 @param[out] op_coarse Coarse grid operator 2215 @param[out] op_prolong Coarse to fine operator, or NULL 2216 @param[out] op_restrict Fine to coarse operator, or NULL 2217 2218 @return An error code: 0 - success, otherwise - failure 2219 2220 @ref User 2221 **/ 2222 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2223 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2224 CeedOperator *op_restrict) { 2225 CeedCall(CeedOperatorCheckReady(op_fine)); 2226 Ceed ceed; 2227 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2228 2229 // Check for compatible quadrature spaces 2230 CeedBasis basis_fine; 2231 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2232 CeedInt Q_f, Q_c; 2233 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2234 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2235 if (Q_f != Q_c) { 2236 // LCOV_EXCL_START 2237 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2238 // LCOV_EXCL_STOP 2239 } 2240 2241 // Coarse to fine basis 2242 CeedBasis basis_c_to_f = NULL; 2243 if (op_prolong || op_restrict) { 2244 // Check if interpolation matrix is provided 2245 if (!interp_c_to_f) { 2246 // LCOV_EXCL_START 2247 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2248 // LCOV_EXCL_STOP 2249 } 2250 CeedElemTopology topo; 2251 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2252 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2253 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2254 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2255 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2256 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2257 CeedScalar *q_ref, *q_weight, *grad; 2258 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2259 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2260 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2261 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2262 CeedCall(CeedFree(&q_ref)); 2263 CeedCall(CeedFree(&q_weight)); 2264 CeedCall(CeedFree(&grad)); 2265 } 2266 2267 // Core code 2268 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2269 return CEED_ERROR_SUCCESS; 2270 } 2271 2272 /** 2273 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2274 2275 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2276 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2277 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2278 \hat S V\f$. 2279 The CeedOperator must be linear and non-composite. 2280 The associated CeedQFunction must therefore also be linear. 2281 2282 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2283 2284 @param[in] op CeedOperator to create element inverses 2285 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2286 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2287 2288 @return An error code: 0 - success, otherwise - failure 2289 2290 @ref User 2291 **/ 2292 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2293 CeedCall(CeedOperatorCheckReady(op)); 2294 2295 if (op->CreateFDMElementInverse) { 2296 // Backend version 2297 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2298 return CEED_ERROR_SUCCESS; 2299 } else { 2300 // Operator fallback 2301 CeedOperator op_fallback; 2302 2303 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2304 if (op_fallback) { 2305 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2306 return CEED_ERROR_SUCCESS; 2307 } 2308 } 2309 2310 // Default interface implementation 2311 Ceed ceed, ceed_parent; 2312 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2313 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2314 ceed_parent = ceed_parent ? ceed_parent : ceed; 2315 CeedQFunction qf; 2316 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2317 2318 // Determine active input basis 2319 bool interp = false, grad = false; 2320 CeedBasis basis = NULL; 2321 CeedElemRestriction rstr = NULL; 2322 CeedOperatorField *op_fields; 2323 CeedQFunctionField *qf_fields; 2324 CeedInt num_input_fields; 2325 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2326 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2327 for (CeedInt i = 0; i < num_input_fields; i++) { 2328 CeedVector vec; 2329 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2330 if (vec == CEED_VECTOR_ACTIVE) { 2331 CeedEvalMode eval_mode; 2332 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2333 interp = interp || eval_mode == CEED_EVAL_INTERP; 2334 grad = grad || eval_mode == CEED_EVAL_GRAD; 2335 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2336 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2337 } 2338 } 2339 if (!basis) { 2340 // LCOV_EXCL_START 2341 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2342 // LCOV_EXCL_STOP 2343 } 2344 CeedSize l_size = 1; 2345 CeedInt P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2346 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2347 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2348 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2349 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2350 CeedCall(CeedBasisGetDimension(basis, &dim)); 2351 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2352 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2353 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2354 2355 // Build and diagonalize 1D Mass and Laplacian 2356 bool tensor_basis; 2357 CeedCall(CeedBasisIsTensor(basis, &tensor_basis)); 2358 if (!tensor_basis) { 2359 // LCOV_EXCL_START 2360 return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2361 // LCOV_EXCL_STOP 2362 } 2363 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2364 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2365 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2366 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2367 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2368 CeedCall(CeedCalloc(P_1d, &lambda)); 2369 // -- Build matrices 2370 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2371 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2372 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2373 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2374 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2375 2376 // -- Diagonalize 2377 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2378 CeedCall(CeedFree(&mass)); 2379 CeedCall(CeedFree(&laplace)); 2380 for (CeedInt i = 0; i < P_1d; i++) { 2381 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2382 } 2383 CeedCall(CeedFree(&x)); 2384 2385 // Assemble QFunction 2386 CeedVector assembled; 2387 CeedElemRestriction rstr_qf; 2388 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2389 CeedInt layout[3]; 2390 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2391 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2392 CeedScalar max_norm = 0; 2393 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2394 2395 // Calculate element averages 2396 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2397 CeedScalar *elem_avg; 2398 const CeedScalar *assembled_array, *q_weight_array; 2399 CeedVector q_weight; 2400 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2401 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2402 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2403 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2404 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2405 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2406 for (CeedInt e = 0; e < num_elem; e++) { 2407 CeedInt count = 0; 2408 for (CeedInt q = 0; q < num_qpts; q++) { 2409 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2410 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2411 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2412 count++; 2413 } 2414 } 2415 } 2416 if (count) { 2417 elem_avg[e] /= count; 2418 } else { 2419 elem_avg[e] = 1.0; 2420 } 2421 } 2422 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2423 CeedCall(CeedVectorDestroy(&assembled)); 2424 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2425 CeedCall(CeedVectorDestroy(&q_weight)); 2426 2427 // Build FDM diagonal 2428 CeedVector q_data; 2429 CeedScalar *q_data_array, *fdm_diagonal; 2430 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2431 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2432 for (CeedInt c = 0; c < num_comp; c++) { 2433 for (CeedInt n = 0; n < num_nodes; n++) { 2434 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2435 if (grad) { 2436 for (CeedInt d = 0; d < dim; d++) { 2437 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2438 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2439 } 2440 } 2441 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2442 } 2443 } 2444 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2445 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2446 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2447 for (CeedInt e = 0; e < num_elem; e++) { 2448 for (CeedInt c = 0; c < num_comp; c++) { 2449 for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2450 } 2451 } 2452 CeedCall(CeedFree(&elem_avg)); 2453 CeedCall(CeedFree(&fdm_diagonal)); 2454 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2455 2456 // Setup FDM operator 2457 // -- Basis 2458 CeedBasis fdm_basis; 2459 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2460 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2461 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2462 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2463 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2464 CeedCall(CeedFree(&fdm_interp)); 2465 CeedCall(CeedFree(&grad_dummy)); 2466 CeedCall(CeedFree(&q_ref_dummy)); 2467 CeedCall(CeedFree(&q_weight_dummy)); 2468 CeedCall(CeedFree(&lambda)); 2469 2470 // -- Restriction 2471 CeedElemRestriction rstr_qd_i; 2472 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2473 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2474 // -- QFunction 2475 CeedQFunction qf_fdm; 2476 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2477 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2478 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2479 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2480 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2481 // -- QFunction context 2482 CeedInt *num_comp_data; 2483 CeedCall(CeedCalloc(1, &num_comp_data)); 2484 num_comp_data[0] = num_comp; 2485 CeedQFunctionContext ctx_fdm; 2486 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2487 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2488 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2489 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2490 // -- Operator 2491 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2492 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2493 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2494 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2495 2496 // Cleanup 2497 CeedCall(CeedVectorDestroy(&q_data)); 2498 CeedCall(CeedBasisDestroy(&fdm_basis)); 2499 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2500 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2501 2502 return CEED_ERROR_SUCCESS; 2503 } 2504 2505 /// @} 2506