1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <assert.h> 9 #include <ceed-impl.h> 10 #include <ceed.h> 11 #include <ceed/backend.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) { 128 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 } 130 // Cleanup 131 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 133 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 134 } 135 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 136 CeedCall(CeedOperatorCheckReady(op_fallback)); 137 op->op_fallback = op_fallback; 138 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 144 145 @param[in] op CeedOperator to retrieve fallback for 146 @param[out] op_fallback Fallback CeedOperator 147 148 @return An error code: 0 - success, otherwise - failure 149 150 @ref Developer 151 **/ 152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 153 // Create if needed 154 if (!op->op_fallback) { 155 CeedCall(CeedOperatorCreateFallback(op)); 156 } 157 if (op->op_fallback) { 158 bool is_debug; 159 160 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 161 if (is_debug) { 162 Ceed ceed, ceed_fallback; 163 const char *resource, *resource_fallback; 164 165 CeedCall(CeedOperatorGetCeed(op, &ceed)); 166 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 167 CeedCall(CeedGetResource(ceed, &resource)); 168 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 169 170 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 171 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 172 op->op_fallback); 173 } 174 } 175 *op_fallback = op->op_fallback; 176 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /** 181 @brief Select correct basis matrix pointer based on CeedEvalMode 182 183 @param[in] basis CeedBasis from which to get the basis matrix 184 @param[in] eval_mode Current basis evaluation mode 185 @param[in] identity Pointer to identity matrix 186 @param[out] basis_ptr Basis pointer to set 187 188 @ref Developer 189 **/ 190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 191 switch (eval_mode) { 192 case CEED_EVAL_NONE: 193 *basis_ptr = identity; 194 break; 195 case CEED_EVAL_INTERP: 196 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 197 break; 198 case CEED_EVAL_GRAD: 199 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 200 break; 201 case CEED_EVAL_DIV: 202 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 203 break; 204 case CEED_EVAL_CURL: 205 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 206 break; 207 case CEED_EVAL_WEIGHT: 208 break; // Caught by QF Assembly 209 } 210 assert(*basis_ptr != NULL); 211 212 return CEED_ERROR_SUCCESS; 213 } 214 215 /** 216 @brief Create point block restriction for active operator field 217 218 @param[in] rstr Original CeedElemRestriction for active field 219 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 220 221 @return An error code: 0 - success, otherwise - failure 222 223 @ref Developer 224 **/ 225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 226 Ceed ceed; 227 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 228 const CeedInt *offsets; 229 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 230 231 // Expand offsets 232 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 233 CeedSize l_size; 234 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 235 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 236 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 237 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 238 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 239 CeedInt shift = num_comp; 240 if (comp_stride != 1) shift *= num_comp; 241 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 242 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 243 pointblock_offsets[i] = offsets[i] * shift; 244 } 245 246 // Create new restriction 247 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 248 pointblock_offsets, pointblock_rstr)); 249 250 // Cleanup 251 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 252 253 return CEED_ERROR_SUCCESS; 254 } 255 256 /** 257 @brief Core logic for assembling operator diagonal or point block diagonal 258 259 @param[in] op CeedOperator to assemble point block diagonal 260 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 261 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 262 @param[out] assembled CeedVector to store assembled diagonal 263 264 @return An error code: 0 - success, otherwise - failure 265 266 @ref Developer 267 **/ 268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 269 Ceed ceed; 270 CeedCall(CeedOperatorGetCeed(op, &ceed)); 271 272 // Assemble QFunction 273 CeedQFunction qf; 274 const CeedScalar *assembled_qf_array; 275 CeedVector assembled_qf; 276 CeedElemRestriction assembled_elem_rstr; 277 CeedInt num_input_fields, num_output_fields; 278 CeedInt layout[3]; 279 280 CeedCall(CeedOperatorGetQFunction(op, &qf)); 281 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 282 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 283 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 284 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 285 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 286 287 // Get assembly data 288 CeedOperatorAssemblyData data; 289 const CeedEvalMode **eval_modes_in, **eval_modes_out; 290 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 291 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 292 CeedBasis *active_bases; 293 CeedElemRestriction *active_elem_rstrs; 294 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 295 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 296 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 297 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 298 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 299 300 // Loop over all active bases 301 for (CeedInt b = 0; b < num_active_bases; b++) { 302 // Assemble point block diagonal restriction, if needed 303 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 304 305 if (is_pointblock) { 306 CeedElemRestriction point_block_elem_rstr; 307 308 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 309 diag_elem_rstr = point_block_elem_rstr; 310 } 311 312 // Create diagonal vector 313 CeedVector elem_diag; 314 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 315 316 // Assemble element operator diagonals 317 CeedScalar *elem_diag_array; 318 CeedInt num_elem, num_nodes, num_qpts, num_components; 319 320 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 321 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 322 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 323 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 324 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 325 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 326 327 // Construct identity matrix for basis if required 328 bool has_eval_none = false; 329 CeedScalar *identity = NULL; 330 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 331 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 332 } 333 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 334 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 335 } 336 if (has_eval_none) { 337 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 338 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 339 } 340 341 // Compute the diagonal of B^T D B 342 // Each element 343 for (CeedInt e = 0; e < num_elem; e++) { 344 // Each basis eval mode pair 345 CeedInt d_out = 0, q_comp_out; 346 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 347 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 348 const CeedScalar *B_t = NULL; 349 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t); 350 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out)); 351 if (q_comp_out > 1) { 352 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 353 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 354 } 355 eval_mode_out_prev = eval_modes_out[b][e_out]; 356 357 CeedInt d_in = 0, q_comp_in; 358 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 359 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 360 const CeedScalar *B = NULL; 361 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B); 362 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in)); 363 if (q_comp_in > 1) { 364 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 365 else B = &B[(++d_in) * num_qpts * num_nodes]; 366 } 367 eval_mode_in_prev = eval_modes_in[b][e_in]; 368 369 // Each component 370 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 371 // Each qpt/node pair 372 for (CeedInt q = 0; q < num_qpts; q++) { 373 if (is_pointblock) { 374 // Point Block Diagonal 375 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 376 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 377 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 378 for (CeedInt n = 0; n < num_nodes; n++) { 379 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 380 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 381 } 382 } 383 } else { 384 // Diagonal Only 385 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 386 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 387 for (CeedInt n = 0; n < num_nodes; n++) { 388 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 389 } 390 } 391 } 392 } 393 } 394 } 395 } 396 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 397 398 // Assemble local operator diagonal 399 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 400 401 // Cleanup 402 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 403 CeedCall(CeedVectorDestroy(&elem_diag)); 404 CeedCall(CeedFree(&identity)); 405 } 406 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 407 CeedCall(CeedVectorDestroy(&assembled_qf)); 408 409 return CEED_ERROR_SUCCESS; 410 } 411 412 /** 413 @brief Core logic for assembling composite operator diagonal 414 415 @param[in] op CeedOperator to assemble point block diagonal 416 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 417 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 418 @param[out] assembled CeedVector to store assembled diagonal 419 420 @return An error code: 0 - success, otherwise - failure 421 422 @ref Developer 423 **/ 424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 CeedInt num_sub; 427 CeedOperator *suboperators; 428 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 429 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 430 for (CeedInt i = 0; i < num_sub; i++) { 431 if (is_pointblock) { 432 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 433 } else { 434 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 435 } 436 } 437 return CEED_ERROR_SUCCESS; 438 } 439 440 /** 441 @brief Build nonzero pattern for non-composite operator 442 443 Users should generally use CeedOperatorLinearAssembleSymbolic() 444 445 @param[in] op CeedOperator to assemble nonzero pattern 446 @param[in] offset Offset for number of entries 447 @param[out] rows Row number for each entry 448 @param[out] cols Column number for each entry 449 450 @return An error code: 0 - success, otherwise - failure 451 452 @ref Developer 453 **/ 454 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 455 Ceed ceed; 456 bool is_composite; 457 CeedCall(CeedOperatorGetCeed(op, &ceed)); 458 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 459 460 if (is_composite) { 461 // LCOV_EXCL_START 462 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 463 // LCOV_EXCL_STOP 464 } 465 466 CeedSize num_nodes; 467 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 468 CeedElemRestriction rstr_in; 469 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 470 CeedInt num_elem, elem_size, num_comp; 471 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 472 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 473 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 474 CeedInt layout_er[3]; 475 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 476 477 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 478 479 // Determine elem_dof relation 480 CeedVector index_vec; 481 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 482 CeedScalar *array; 483 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 484 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 485 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 486 CeedVector elem_dof; 487 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 488 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 489 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 490 const CeedScalar *elem_dof_a; 491 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 492 CeedCall(CeedVectorDestroy(&index_vec)); 493 494 // Determine i, j locations for element matrices 495 CeedInt count = 0; 496 for (CeedInt e = 0; e < num_elem; e++) { 497 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 498 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 499 for (CeedInt i = 0; i < elem_size; i++) { 500 for (CeedInt j = 0; j < elem_size; j++) { 501 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 502 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 503 504 const CeedInt row = elem_dof_a[elem_dof_index_row]; 505 const CeedInt col = elem_dof_a[elem_dof_index_col]; 506 507 rows[offset + count] = row; 508 cols[offset + count] = col; 509 count++; 510 } 511 } 512 } 513 } 514 } 515 if (count != local_num_entries) { 516 // LCOV_EXCL_START 517 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 518 // LCOV_EXCL_STOP 519 } 520 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 521 CeedCall(CeedVectorDestroy(&elem_dof)); 522 523 return CEED_ERROR_SUCCESS; 524 } 525 526 /** 527 @brief Assemble nonzero entries for non-composite operator 528 529 Users should generally use CeedOperatorLinearAssemble() 530 531 @param[in] op CeedOperator to assemble 532 @param[in] offset Offset for number of entries 533 @param[out] values Values to assemble into matrix 534 535 @return An error code: 0 - success, otherwise - failure 536 537 @ref Developer 538 **/ 539 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 540 Ceed ceed; 541 bool is_composite; 542 CeedCall(CeedOperatorGetCeed(op, &ceed)); 543 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 544 545 if (is_composite) { 546 // LCOV_EXCL_START 547 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 548 // LCOV_EXCL_STOP 549 } 550 551 // Early exit for empty operator 552 { 553 CeedInt num_elem = 0; 554 555 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 556 if (num_elem == 0) return CEED_ERROR_SUCCESS; 557 } 558 559 if (op->LinearAssembleSingle) { 560 // Backend version 561 CeedCall(op->LinearAssembleSingle(op, offset, values)); 562 return CEED_ERROR_SUCCESS; 563 } else { 564 // Operator fallback 565 CeedOperator op_fallback; 566 567 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 568 if (op_fallback) { 569 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 570 return CEED_ERROR_SUCCESS; 571 } 572 } 573 574 // Assemble QFunction 575 CeedQFunction qf; 576 CeedCall(CeedOperatorGetQFunction(op, &qf)); 577 CeedVector assembled_qf; 578 CeedElemRestriction rstr_q; 579 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 580 CeedSize qf_length; 581 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 582 583 CeedInt num_input_fields, num_output_fields; 584 CeedOperatorField *input_fields; 585 CeedOperatorField *output_fields; 586 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 587 588 // Get assembly data 589 CeedOperatorAssemblyData data; 590 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 591 const CeedEvalMode **eval_modes_in, **eval_modes_out; 592 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 593 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 594 &eval_modes_out, NULL, NULL)); 595 CeedBasis *bases; 596 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 597 CeedBasis basis_in = bases[0]; 598 599 if (num_active_bases > 1) { 600 // LCOV_EXCL_START 601 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 602 // LCOV_EXCL_STOP 603 } 604 if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) { 605 // LCOV_EXCL_START 606 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 607 // LCOV_EXCL_STOP 608 } 609 610 CeedElemRestriction active_rstr; 611 CeedInt num_elem, elem_size, num_qpts, num_comp; 612 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 613 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 614 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 615 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 616 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 617 618 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 619 620 // loop over elements and put in data structure 621 const CeedScalar *assembled_qf_array; 622 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 623 624 CeedInt layout_qf[3]; 625 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 626 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 627 628 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 629 const CeedScalar **B_mats_in, **B_mats_out; 630 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 631 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 632 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 633 CeedScalar elem_mat[elem_size * elem_size]; 634 CeedInt count = 0; 635 CeedScalar *vals; 636 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 637 for (CeedInt e = 0; e < num_elem; e++) { 638 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 639 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 640 // Compute B^T*D 641 for (CeedInt n = 0; n < elem_size; n++) { 642 for (CeedInt q = 0; q < num_qpts; q++) { 643 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 644 const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 645 CeedScalar sum = 0.0; 646 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 647 const CeedInt b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 648 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 649 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 650 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 651 } 652 BTD_mat[btd_index] = sum; 653 } 654 } 655 } 656 // form element matrix itself (for each block component) 657 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 658 659 // put element matrix in coordinate data structure 660 for (CeedInt i = 0; i < elem_size; i++) { 661 for (CeedInt j = 0; j < elem_size; j++) { 662 vals[offset + count] = elem_mat[i * elem_size + j]; 663 count++; 664 } 665 } 666 } 667 } 668 } 669 if (count != local_num_entries) { 670 // LCOV_EXCL_START 671 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 672 // LCOV_EXCL_STOP 673 } 674 CeedCall(CeedVectorRestoreArray(values, &vals)); 675 676 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 677 CeedCall(CeedVectorDestroy(&assembled_qf)); 678 679 return CEED_ERROR_SUCCESS; 680 } 681 682 /** 683 @brief Count number of entries for assembled CeedOperator 684 685 @param[in] op CeedOperator to assemble 686 @param[out] num_entries Number of entries in assembled representation 687 688 @return An error code: 0 - success, otherwise - failure 689 690 @ref Utility 691 **/ 692 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 693 bool is_composite; 694 CeedElemRestriction rstr; 695 CeedInt num_elem, elem_size, num_comp; 696 697 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 698 if (is_composite) { 699 // LCOV_EXCL_START 700 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 701 // LCOV_EXCL_STOP 702 } 703 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 704 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 705 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 706 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 707 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 708 709 return CEED_ERROR_SUCCESS; 710 } 711 712 /** 713 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 714 715 @param[in] op_fine Fine grid operator 716 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 717 @param[in] rstr_coarse Coarse grid restriction 718 @param[in] basis_coarse Coarse grid active vector basis 719 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 720 @param[out] op_coarse Coarse grid operator 721 @param[out] op_prolong Coarse to fine operator, or NULL 722 @param[out] op_restrict Fine to coarse operator, or NULL 723 724 @return An error code: 0 - success, otherwise - failure 725 726 @ref Developer 727 **/ 728 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 729 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 730 Ceed ceed; 731 CeedVector mult_vec = NULL; 732 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 733 734 // Check for composite operator 735 bool is_composite; 736 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 737 if (is_composite) { 738 // LCOV_EXCL_START 739 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 740 // LCOV_EXCL_STOP 741 } 742 743 // Coarse Grid 744 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 745 CeedElemRestriction rstr_fine = NULL; 746 // -- Clone input fields 747 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 748 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 749 rstr_fine = op_fine->input_fields[i]->elem_rstr; 750 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 751 } else { 752 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 753 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 754 } 755 } 756 // -- Clone output fields 757 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 758 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 759 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 760 } else { 761 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 762 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 763 } 764 } 765 // -- Clone QFunctionAssemblyData 766 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 767 768 // Multiplicity vector 769 if (op_restrict || op_prolong) { 770 CeedVector mult_e_vec; 771 772 if (!p_mult_fine) { 773 // LCOV_EXCL_START 774 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 775 // LCOV_EXCL_STOP 776 } 777 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 778 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 779 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 780 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 781 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 782 CeedCall(CeedVectorDestroy(&mult_e_vec)); 783 CeedCall(CeedVectorReciprocal(mult_vec)); 784 } 785 786 // Clone name 787 bool has_name = op_fine->name; 788 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 789 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 790 791 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 792 if ((op_restrict || op_prolong) && !basis_c_to_f) { 793 // LCOV_EXCL_START 794 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis"); 795 // LCOV_EXCL_STOP 796 } 797 798 // Restriction/Prolongation Operators 799 CeedInt num_comp; 800 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 801 802 // Restriction 803 if (op_restrict) { 804 CeedInt *num_comp_r_data; 805 CeedQFunction qf_restrict; 806 CeedQFunctionContext ctx_r; 807 808 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 809 CeedCall(CeedCalloc(1, &num_comp_r_data)); 810 num_comp_r_data[0] = num_comp; 811 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 812 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 813 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 814 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 815 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 816 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 817 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 818 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 819 820 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 821 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 822 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 823 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 824 825 // Set name 826 char *restriction_name; 827 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 828 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 829 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 830 CeedCall(CeedFree(&restriction_name)); 831 832 // Check 833 CeedCall(CeedOperatorCheckReady(*op_restrict)); 834 835 // Cleanup 836 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 837 } 838 839 // Prolongation 840 if (op_prolong) { 841 CeedInt *num_comp_p_data; 842 CeedQFunction qf_prolong; 843 CeedQFunctionContext ctx_p; 844 845 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 846 CeedCall(CeedCalloc(1, &num_comp_p_data)); 847 num_comp_p_data[0] = num_comp; 848 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 849 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 850 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 851 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 852 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 853 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 854 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 855 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 856 857 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 858 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 859 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 860 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 861 862 // Set name 863 char *prolongation_name; 864 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 865 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 866 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 867 CeedCall(CeedFree(&prolongation_name)); 868 869 // Check 870 CeedCall(CeedOperatorCheckReady(*op_prolong)); 871 872 // Cleanup 873 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 874 } 875 876 // Check 877 CeedCall(CeedOperatorCheckReady(*op_coarse)); 878 879 // Cleanup 880 CeedCall(CeedVectorDestroy(&mult_vec)); 881 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 882 883 return CEED_ERROR_SUCCESS; 884 } 885 886 /** 887 @brief Build 1D mass matrix and Laplacian with perturbation 888 889 @param[in] interp_1d Interpolation matrix in one dimension 890 @param[in] grad_1d Gradient matrix in one dimension 891 @param[in] q_weight_1d Quadrature weights in one dimension 892 @param[in] P_1d Number of basis nodes in one dimension 893 @param[in] Q_1d Number of quadrature points in one dimension 894 @param[in] dim Dimension of basis 895 @param[out] mass Assembled mass matrix in one dimension 896 @param[out] laplace Assembled perturbed Laplacian in one dimension 897 898 @return An error code: 0 - success, otherwise - failure 899 900 @ref Developer 901 **/ 902 CeedPragmaOptimizeOff 903 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 904 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 905 for (CeedInt i = 0; i < P_1d; i++) { 906 for (CeedInt j = 0; j < P_1d; j++) { 907 CeedScalar sum = 0.0; 908 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 909 mass[i + j * P_1d] = sum; 910 } 911 } 912 // -- Laplacian 913 for (CeedInt i = 0; i < P_1d; i++) { 914 for (CeedInt j = 0; j < P_1d; j++) { 915 CeedScalar sum = 0.0; 916 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 917 laplace[i + j * P_1d] = sum; 918 } 919 } 920 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 921 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 922 return CEED_ERROR_SUCCESS; 923 } 924 CeedPragmaOptimizeOn 925 926 /// @} 927 928 /// ---------------------------------------------------------------------------- 929 /// CeedOperator Backend API 930 /// ---------------------------------------------------------------------------- 931 /// @addtogroup CeedOperatorBackend 932 /// @{ 933 934 /** 935 @brief Create object holding CeedQFunction assembly data for CeedOperator 936 937 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 938 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 939 940 @return An error code: 0 - success, otherwise - failure 941 942 @ref Backend 943 **/ 944 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 945 CeedCall(CeedCalloc(1, data)); 946 (*data)->ref_count = 1; 947 (*data)->ceed = ceed; 948 CeedCall(CeedReference(ceed)); 949 950 return CEED_ERROR_SUCCESS; 951 } 952 953 /** 954 @brief Increment the reference counter for a CeedQFunctionAssemblyData 955 956 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 957 958 @return An error code: 0 - success, otherwise - failure 959 960 @ref Backend 961 **/ 962 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 963 data->ref_count++; 964 return CEED_ERROR_SUCCESS; 965 } 966 967 /** 968 @brief Set re-use of CeedQFunctionAssemblyData 969 970 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 971 @param[in] reuse_data Boolean flag indicating data re-use 972 973 @return An error code: 0 - success, otherwise - failure 974 975 @ref Backend 976 **/ 977 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 978 data->reuse_data = reuse_data; 979 data->needs_data_update = true; 980 return CEED_ERROR_SUCCESS; 981 } 982 983 /** 984 @brief Mark QFunctionAssemblyData as stale 985 986 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 987 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 988 989 @return An error code: 0 - success, otherwise - failure 990 991 @ref Backend 992 **/ 993 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 994 data->needs_data_update = needs_data_update; 995 return CEED_ERROR_SUCCESS; 996 } 997 998 /** 999 @brief Determine if QFunctionAssemblyData needs update 1000 1001 @param[in] data CeedQFunctionAssemblyData to mark as stale 1002 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1003 1004 @return An error code: 0 - success, otherwise - failure 1005 1006 @ref Backend 1007 **/ 1008 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1009 *is_update_needed = !data->reuse_data || data->needs_data_update; 1010 return CEED_ERROR_SUCCESS; 1011 } 1012 1013 /** 1014 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1015 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1016 1017 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1018 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1019 CeedQFunctionAssemblyData. 1020 1021 @param[in] data CeedQFunctionAssemblyData to copy reference to 1022 @param[in,out] data_copy Variable to store copied reference 1023 1024 @return An error code: 0 - success, otherwise - failure 1025 1026 @ref Backend 1027 **/ 1028 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1029 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1030 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1031 *data_copy = data; 1032 return CEED_ERROR_SUCCESS; 1033 } 1034 1035 /** 1036 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1037 1038 @param[in] data CeedQFunctionAssemblyData to retrieve status 1039 @param[out] is_setup Boolean flag for setup status 1040 1041 @return An error code: 0 - success, otherwise - failure 1042 1043 @ref Backend 1044 **/ 1045 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1046 *is_setup = data->is_setup; 1047 return CEED_ERROR_SUCCESS; 1048 } 1049 1050 /** 1051 @brief Set internal objects for CeedQFunctionAssemblyData 1052 1053 @param[in,out] data CeedQFunctionAssemblyData to set objects 1054 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1055 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1056 1057 @return An error code: 0 - success, otherwise - failure 1058 1059 @ref Backend 1060 **/ 1061 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1062 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1063 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1064 1065 data->is_setup = true; 1066 return CEED_ERROR_SUCCESS; 1067 } 1068 1069 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1070 if (!data->is_setup) { 1071 // LCOV_EXCL_START 1072 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1073 // LCOV_EXCL_STOP 1074 } 1075 1076 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1077 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1078 1079 return CEED_ERROR_SUCCESS; 1080 } 1081 1082 /** 1083 @brief Destroy CeedQFunctionAssemblyData 1084 1085 @param[in,out] data CeedQFunctionAssemblyData to destroy 1086 1087 @return An error code: 0 - success, otherwise - failure 1088 1089 @ref Backend 1090 **/ 1091 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1092 if (!*data || --(*data)->ref_count > 0) { 1093 *data = NULL; 1094 return CEED_ERROR_SUCCESS; 1095 } 1096 CeedCall(CeedDestroy(&(*data)->ceed)); 1097 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1098 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1099 1100 CeedCall(CeedFree(data)); 1101 return CEED_ERROR_SUCCESS; 1102 } 1103 1104 /** 1105 @brief Get CeedOperatorAssemblyData 1106 1107 @param[in] op CeedOperator to assemble 1108 @param[out] data CeedQFunctionAssemblyData 1109 1110 @return An error code: 0 - success, otherwise - failure 1111 1112 @ref Backend 1113 **/ 1114 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1115 if (!op->op_assembled) { 1116 CeedOperatorAssemblyData data; 1117 1118 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1119 op->op_assembled = data; 1120 } 1121 *data = op->op_assembled; 1122 1123 return CEED_ERROR_SUCCESS; 1124 } 1125 1126 /** 1127 @brief Create object holding CeedOperator assembly data. 1128 1129 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1130 An array with references to the corresponding active CeedElemRestrictions is also stored. 1131 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1132 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1133 CeedEvalMode. 1134 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1135 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1136 1137 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1138 @param[in] op CeedOperator to be assembled 1139 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1140 1141 @return An error code: 0 - success, otherwise - failure 1142 1143 @ref Backend 1144 **/ 1145 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1146 CeedInt num_active_bases = 0; 1147 1148 // Allocate 1149 CeedCall(CeedCalloc(1, data)); 1150 (*data)->ceed = ceed; 1151 CeedCall(CeedReference(ceed)); 1152 1153 // Build OperatorAssembly data 1154 CeedQFunction qf; 1155 CeedQFunctionField *qf_fields; 1156 CeedOperatorField *op_fields; 1157 CeedInt num_input_fields; 1158 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1159 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1160 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1161 1162 // Determine active input basis 1163 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1164 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1165 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1166 for (CeedInt i = 0; i < num_input_fields; i++) { 1167 CeedVector vec; 1168 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1169 if (vec == CEED_VECTOR_ACTIVE) { 1170 CeedBasis basis_in = NULL; 1171 CeedEvalMode eval_mode; 1172 CeedInt index = -1, dim, num_comp, q_comp; 1173 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1174 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1175 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1176 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1177 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1178 for (CeedInt i = 0; i < num_active_bases; i++) { 1179 if ((*data)->active_bases[i] == basis_in) index = i; 1180 } 1181 if (index == -1) { 1182 CeedElemRestriction elem_rstr_in; 1183 index = num_active_bases; 1184 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1185 (*data)->active_bases[num_active_bases] = NULL; 1186 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1187 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1188 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1189 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1190 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1191 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1192 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1193 num_eval_modes_in[index] = 0; 1194 num_eval_modes_out[index] = 0; 1195 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1196 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1197 eval_modes_in[index] = NULL; 1198 eval_modes_out[index] = NULL; 1199 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1200 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1201 eval_mode_offsets_in[index] = NULL; 1202 eval_mode_offsets_out[index] = NULL; 1203 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1204 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1205 (*data)->assembled_bases_in[index] = NULL; 1206 (*data)->assembled_bases_out[index] = NULL; 1207 num_active_bases++; 1208 } 1209 if (eval_mode != CEED_EVAL_WEIGHT) { 1210 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1211 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1212 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1213 for (CeedInt d = 0; d < q_comp; d++) { 1214 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1215 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1216 offset += num_comp; 1217 } 1218 num_eval_modes_in[index] += q_comp; 1219 } 1220 } 1221 } 1222 (*data)->num_eval_modes_in = num_eval_modes_in; 1223 (*data)->eval_modes_in = eval_modes_in; 1224 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1225 1226 // Determine active output basis 1227 CeedInt num_output_fields; 1228 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1229 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1230 offset = 0; 1231 for (CeedInt i = 0; i < num_output_fields; i++) { 1232 CeedVector vec; 1233 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1234 if (vec == CEED_VECTOR_ACTIVE) { 1235 CeedBasis basis_out = NULL; 1236 CeedEvalMode eval_mode; 1237 CeedInt index = -1, dim, num_comp, q_comp; 1238 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1239 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1240 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1241 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1242 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1243 for (CeedInt i = 0; i < num_active_bases; i++) { 1244 if ((*data)->active_bases[i] == basis_out) index = i; 1245 } 1246 if (index == -1) { 1247 CeedElemRestriction elem_rstr_out; 1248 1249 index = num_active_bases; 1250 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1251 (*data)->active_bases[num_active_bases] = NULL; 1252 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1253 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1254 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1255 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1256 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1257 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1258 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1259 num_eval_modes_in[index] = 0; 1260 num_eval_modes_out[index] = 0; 1261 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1262 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1263 eval_modes_in[index] = NULL; 1264 eval_modes_out[index] = NULL; 1265 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1266 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1267 eval_mode_offsets_in[index] = NULL; 1268 eval_mode_offsets_out[index] = NULL; 1269 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1270 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1271 (*data)->assembled_bases_in[index] = NULL; 1272 (*data)->assembled_bases_out[index] = NULL; 1273 num_active_bases++; 1274 } 1275 if (eval_mode != CEED_EVAL_WEIGHT) { 1276 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1277 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1278 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1279 for (CeedInt d = 0; d < q_comp; d++) { 1280 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1281 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1282 offset += num_comp; 1283 } 1284 num_eval_modes_out[index] += q_comp; 1285 } 1286 } 1287 } 1288 (*data)->num_output_components = offset; 1289 (*data)->num_eval_modes_out = num_eval_modes_out; 1290 (*data)->eval_modes_out = eval_modes_out; 1291 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1292 (*data)->num_active_bases = num_active_bases; 1293 1294 return CEED_ERROR_SUCCESS; 1295 } 1296 1297 /** 1298 @brief Get CeedOperator CeedEvalModes for assembly. 1299 1300 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1301 1302 @param[in] data CeedOperatorAssemblyData 1303 @param[out] num_active_bases Total number of active bases 1304 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1305 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1306 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1307 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1308 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1309 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1310 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1311 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1312 including contributions of all active bases 1313 1314 @return An error code: 0 - success, otherwise - failure 1315 1316 1317 @ref Backend 1318 **/ 1319 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1320 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1321 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1322 if (num_active_bases) *num_active_bases = data->num_active_bases; 1323 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1324 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1325 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1326 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1327 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1328 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1329 if (num_output_components) *num_output_components = data->num_output_components; 1330 1331 return CEED_ERROR_SUCCESS; 1332 } 1333 1334 /** 1335 @brief Get CeedOperator CeedBasis data for assembly. 1336 1337 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1338 1339 @param[in] data CeedOperatorAssemblyData 1340 @param[out] num_active_bases Number of active bases, or NULL 1341 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1342 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1343 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1344 1345 @return An error code: 0 - success, otherwise - failure 1346 1347 @ref Backend 1348 **/ 1349 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1350 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1351 // Assemble B_in, B_out if needed 1352 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1353 CeedInt num_qpts; 1354 1355 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1356 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1357 CeedInt num_nodes; 1358 CeedScalar *B_in = NULL, *identity = NULL; 1359 bool has_eval_none = false; 1360 1361 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1362 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1363 1364 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1365 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1366 } 1367 if (has_eval_none) { 1368 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1369 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1370 identity[i * num_nodes + i] = 1.0; 1371 } 1372 } 1373 1374 for (CeedInt q = 0; q < num_qpts; q++) { 1375 for (CeedInt n = 0; n < num_nodes; n++) { 1376 CeedInt d_in = 0, q_comp_in; 1377 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1378 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1379 const CeedInt qq = data->num_eval_modes_in[b] * q; 1380 const CeedScalar *B = NULL; 1381 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B); 1382 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1383 if (q_comp_in > 1) { 1384 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1385 else B = &B[(++d_in) * num_qpts * num_nodes]; 1386 } 1387 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1388 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1389 } 1390 } 1391 } 1392 if (identity) CeedCall(CeedFree(identity)); 1393 data->assembled_bases_in[b] = B_in; 1394 } 1395 } 1396 1397 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1398 CeedInt num_qpts; 1399 1400 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1401 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1402 CeedInt num_nodes; 1403 bool has_eval_none = false; 1404 CeedScalar *B_out = NULL, *identity = NULL; 1405 1406 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1407 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1408 1409 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1410 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1411 } 1412 if (has_eval_none) { 1413 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1414 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1415 identity[i * num_nodes + i] = 1.0; 1416 } 1417 } 1418 1419 for (CeedInt q = 0; q < num_qpts; q++) { 1420 for (CeedInt n = 0; n < num_nodes; n++) { 1421 CeedInt d_out = 0, q_comp_out; 1422 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1423 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1424 const CeedInt qq = data->num_eval_modes_out[b] * q; 1425 const CeedScalar *B = NULL; 1426 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B); 1427 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1428 if (q_comp_out > 1) { 1429 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1430 else B = &B[(++d_out) * num_qpts * num_nodes]; 1431 } 1432 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1433 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1434 } 1435 } 1436 } 1437 if (identity) CeedCall(CeedFree(identity)); 1438 data->assembled_bases_out[b] = B_out; 1439 } 1440 } 1441 1442 // Pass out assembled data 1443 if (active_bases) *active_bases = data->active_bases; 1444 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1445 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1446 1447 return CEED_ERROR_SUCCESS; 1448 } 1449 1450 /** 1451 @brief Get CeedOperator CeedBasis data for assembly. 1452 1453 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1454 1455 @param[in] data CeedOperatorAssemblyData 1456 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1457 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1458 1459 @return An error code: 0 - success, otherwise - failure 1460 1461 @ref Backend 1462 **/ 1463 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1464 CeedElemRestriction **active_elem_rstrs) { 1465 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1466 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1467 1468 return CEED_ERROR_SUCCESS; 1469 } 1470 1471 /** 1472 @brief Destroy CeedOperatorAssemblyData 1473 1474 @param[in,out] data CeedOperatorAssemblyData to destroy 1475 1476 @return An error code: 0 - success, otherwise - failure 1477 1478 @ref Backend 1479 **/ 1480 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1481 if (!*data) { 1482 *data = NULL; 1483 return CEED_ERROR_SUCCESS; 1484 } 1485 CeedCall(CeedDestroy(&(*data)->ceed)); 1486 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1487 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1488 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1489 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1490 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1491 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1492 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1493 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1494 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1495 } 1496 CeedCall(CeedFree(&(*data)->active_bases)); 1497 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1498 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1499 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1500 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1501 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1502 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1503 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1504 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1505 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1506 1507 CeedCall(CeedFree(data)); 1508 return CEED_ERROR_SUCCESS; 1509 } 1510 1511 /// @} 1512 1513 /// ---------------------------------------------------------------------------- 1514 /// CeedOperator Public API 1515 /// ---------------------------------------------------------------------------- 1516 /// @addtogroup CeedOperatorUser 1517 /// @{ 1518 1519 /** 1520 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1521 1522 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1523 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1524 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1525 1526 Inputs and outputs are in the order provided by the 1527 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, 1528 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] 1529 and producing the output [dv_0, dv_1, v]. 1530 1531 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1532 1533 @param[in] op CeedOperator to assemble CeedQFunction 1534 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1535 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1536 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1537 1538 @return An error code: 0 - success, otherwise - failure 1539 1540 @ref User 1541 **/ 1542 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1543 CeedCall(CeedOperatorCheckReady(op)); 1544 1545 if (op->LinearAssembleQFunction) { 1546 // Backend version 1547 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1548 } else { 1549 // Operator fallback 1550 CeedOperator op_fallback; 1551 1552 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1553 if (op_fallback) { 1554 CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1555 } else { 1556 // LCOV_EXCL_START 1557 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1558 // LCOV_EXCL_STOP 1559 } 1560 } 1561 return CEED_ERROR_SUCCESS; 1562 } 1563 1564 /** 1565 @brief Assemble CeedQFunction and store result internally. 1566 Return copied references of stored data to the caller. 1567 Caller is responsible for ownership and destruction of the copied references. 1568 See also @ref CeedOperatorLinearAssembleQFunction 1569 1570 @param[in] op CeedOperator to assemble CeedQFunction 1571 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1572 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1573 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1574 1575 @return An error code: 0 - success, otherwise - failure 1576 1577 @ref User 1578 **/ 1579 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1580 CeedCall(CeedOperatorCheckReady(op)); 1581 1582 if (op->LinearAssembleQFunctionUpdate) { 1583 // Backend version 1584 bool qf_assembled_is_setup; 1585 CeedVector assembled_vec = NULL; 1586 CeedElemRestriction assembled_rstr = NULL; 1587 1588 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1589 if (qf_assembled_is_setup) { 1590 bool update_needed; 1591 1592 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1593 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1594 if (update_needed) { 1595 CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1596 } 1597 } else { 1598 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1599 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1600 } 1601 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1602 1603 // Copy reference from internally held copy 1604 *assembled = NULL; 1605 *rstr = NULL; 1606 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1607 CeedCall(CeedVectorDestroy(&assembled_vec)); 1608 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1609 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1610 } else { 1611 // Operator fallback 1612 CeedOperator op_fallback; 1613 1614 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1615 if (op_fallback) { 1616 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1617 } else { 1618 // LCOV_EXCL_START 1619 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1620 // LCOV_EXCL_STOP 1621 } 1622 } 1623 1624 return CEED_ERROR_SUCCESS; 1625 } 1626 1627 /** 1628 @brief Assemble the diagonal of a square linear CeedOperator 1629 1630 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1631 1632 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1633 1634 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1635 1636 @param[in] op CeedOperator to assemble CeedQFunction 1637 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1638 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1639 1640 @return An error code: 0 - success, otherwise - failure 1641 1642 @ref User 1643 **/ 1644 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1645 bool is_composite; 1646 CeedCall(CeedOperatorCheckReady(op)); 1647 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1648 1649 CeedSize input_size = 0, output_size = 0; 1650 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1651 if (input_size != output_size) { 1652 // LCOV_EXCL_START 1653 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1654 // LCOV_EXCL_STOP 1655 } 1656 1657 // Early exit for empty operator 1658 if (!is_composite) { 1659 CeedInt num_elem = 0; 1660 1661 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1662 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1663 } 1664 1665 if (op->LinearAssembleDiagonal) { 1666 // Backend version 1667 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1668 return CEED_ERROR_SUCCESS; 1669 } else if (op->LinearAssembleAddDiagonal) { 1670 // Backend version with zeroing first 1671 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1672 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1673 return CEED_ERROR_SUCCESS; 1674 } else { 1675 // Operator fallback 1676 CeedOperator op_fallback; 1677 1678 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1679 if (op_fallback) { 1680 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1681 return CEED_ERROR_SUCCESS; 1682 } 1683 } 1684 // Default interface implementation 1685 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1686 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1687 1688 return CEED_ERROR_SUCCESS; 1689 } 1690 1691 /** 1692 @brief Assemble the diagonal of a square linear CeedOperator 1693 1694 This sums into a CeedVector the diagonal of a linear CeedOperator. 1695 1696 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1697 1698 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1699 1700 @param[in] op CeedOperator to assemble CeedQFunction 1701 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1702 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1703 1704 @return An error code: 0 - success, otherwise - failure 1705 1706 @ref User 1707 **/ 1708 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1709 bool is_composite; 1710 CeedCall(CeedOperatorCheckReady(op)); 1711 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1712 1713 CeedSize input_size = 0, output_size = 0; 1714 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1715 if (input_size != output_size) { 1716 // LCOV_EXCL_START 1717 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1718 // LCOV_EXCL_STOP 1719 } 1720 1721 // Early exit for empty operator 1722 if (!is_composite) { 1723 CeedInt num_elem = 0; 1724 1725 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1726 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1727 } 1728 1729 if (op->LinearAssembleAddDiagonal) { 1730 // Backend version 1731 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1732 return CEED_ERROR_SUCCESS; 1733 } else { 1734 // Operator fallback 1735 CeedOperator op_fallback; 1736 1737 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1738 if (op_fallback) { 1739 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1740 return CEED_ERROR_SUCCESS; 1741 } 1742 } 1743 // Default interface implementation 1744 if (is_composite) { 1745 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1746 } else { 1747 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1748 } 1749 1750 return CEED_ERROR_SUCCESS; 1751 } 1752 1753 /** 1754 @brief Assemble the point block diagonal of a square linear CeedOperator 1755 1756 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1757 1758 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1759 1760 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1761 1762 @param[in] op CeedOperator to assemble CeedQFunction 1763 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1764 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1765 component in]. 1766 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1767 1768 @return An error code: 0 - success, otherwise - failure 1769 1770 @ref User 1771 **/ 1772 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1773 bool is_composite; 1774 CeedCall(CeedOperatorCheckReady(op)); 1775 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1776 1777 CeedSize input_size = 0, output_size = 0; 1778 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1779 if (input_size != output_size) { 1780 // LCOV_EXCL_START 1781 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1782 // LCOV_EXCL_STOP 1783 } 1784 1785 // Early exit for empty operator 1786 if (!is_composite) { 1787 CeedInt num_elem = 0; 1788 1789 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1790 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1791 } 1792 1793 if (op->LinearAssemblePointBlockDiagonal) { 1794 // Backend version 1795 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1796 return CEED_ERROR_SUCCESS; 1797 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1798 // Backend version with zeroing first 1799 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1800 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1801 return CEED_ERROR_SUCCESS; 1802 } else { 1803 // Operator fallback 1804 CeedOperator op_fallback; 1805 1806 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1807 if (op_fallback) { 1808 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1809 return CEED_ERROR_SUCCESS; 1810 } 1811 } 1812 // Default interface implementation 1813 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1814 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1815 1816 return CEED_ERROR_SUCCESS; 1817 } 1818 1819 /** 1820 @brief Assemble the point block diagonal of a square linear CeedOperator 1821 1822 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1823 1824 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1825 1826 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1827 1828 @param[in] op CeedOperator to assemble CeedQFunction 1829 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1830 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1831 component in]. 1832 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1833 1834 @return An error code: 0 - success, otherwise - failure 1835 1836 @ref User 1837 **/ 1838 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1839 bool is_composite; 1840 CeedCall(CeedOperatorCheckReady(op)); 1841 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1842 1843 CeedSize input_size = 0, output_size = 0; 1844 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1845 if (input_size != output_size) { 1846 // LCOV_EXCL_START 1847 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1848 // LCOV_EXCL_STOP 1849 } 1850 1851 // Early exit for empty operator 1852 if (!is_composite) { 1853 CeedInt num_elem = 0; 1854 1855 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1856 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1857 } 1858 1859 if (op->LinearAssembleAddPointBlockDiagonal) { 1860 // Backend version 1861 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1862 return CEED_ERROR_SUCCESS; 1863 } else { 1864 // Operator fallback 1865 CeedOperator op_fallback; 1866 1867 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1868 if (op_fallback) { 1869 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1870 return CEED_ERROR_SUCCESS; 1871 } 1872 } 1873 // Default interface implementation 1874 if (is_composite) { 1875 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1876 } else { 1877 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1878 } 1879 1880 return CEED_ERROR_SUCCESS; 1881 } 1882 1883 /** 1884 @brief Fully assemble the nonzero pattern of a linear operator. 1885 1886 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1887 1888 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1889 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j) 1890 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1891 1892 This will generally be slow unless your operator is low-order. 1893 1894 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1895 1896 @param[in] op CeedOperator to assemble 1897 @param[out] num_entries Number of entries in coordinate nonzero pattern 1898 @param[out] rows Row number for each entry 1899 @param[out] cols Column number for each entry 1900 1901 @ref User 1902 **/ 1903 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1904 CeedInt num_suboperators, single_entries; 1905 CeedOperator *sub_operators; 1906 bool is_composite; 1907 CeedCall(CeedOperatorCheckReady(op)); 1908 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1909 1910 if (op->LinearAssembleSymbolic) { 1911 // Backend version 1912 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1913 return CEED_ERROR_SUCCESS; 1914 } else { 1915 // Operator fallback 1916 CeedOperator op_fallback; 1917 1918 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1919 if (op_fallback) { 1920 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1921 return CEED_ERROR_SUCCESS; 1922 } 1923 } 1924 1925 // Default interface implementation 1926 1927 // count entries and allocate rows, cols arrays 1928 *num_entries = 0; 1929 if (is_composite) { 1930 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1931 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1932 for (CeedInt k = 0; k < num_suboperators; ++k) { 1933 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1934 *num_entries += single_entries; 1935 } 1936 } else { 1937 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1938 *num_entries += single_entries; 1939 } 1940 CeedCall(CeedCalloc(*num_entries, rows)); 1941 CeedCall(CeedCalloc(*num_entries, cols)); 1942 1943 // assemble nonzero locations 1944 CeedInt offset = 0; 1945 if (is_composite) { 1946 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1947 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1948 for (CeedInt k = 0; k < num_suboperators; ++k) { 1949 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1950 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1951 offset += single_entries; 1952 } 1953 } else { 1954 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1955 } 1956 1957 return CEED_ERROR_SUCCESS; 1958 } 1959 1960 /** 1961 @brief Fully assemble the nonzero entries of a linear operator. 1962 1963 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1964 1965 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1966 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added, 1967 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1968 1969 This will generally be slow unless your operator is low-order. 1970 1971 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1972 1973 @param[in] op CeedOperator to assemble 1974 @param[out] values Values to assemble into matrix 1975 1976 @ref User 1977 **/ 1978 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1979 CeedInt num_suboperators, single_entries = 0; 1980 CeedOperator *sub_operators; 1981 bool is_composite; 1982 CeedCall(CeedOperatorCheckReady(op)); 1983 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1984 1985 // Early exit for empty operator 1986 if (!is_composite) { 1987 CeedInt num_elem = 0; 1988 1989 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1990 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1991 } 1992 1993 if (op->LinearAssemble) { 1994 // Backend version 1995 CeedCall(op->LinearAssemble(op, values)); 1996 return CEED_ERROR_SUCCESS; 1997 } else { 1998 // Operator fallback 1999 CeedOperator op_fallback; 2000 2001 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2002 if (op_fallback) { 2003 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2004 return CEED_ERROR_SUCCESS; 2005 } 2006 } 2007 2008 // Default interface implementation 2009 CeedInt offset = 0; 2010 CeedCall(CeedVectorSetValue(values, 0.0)); 2011 if (is_composite) { 2012 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2013 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2014 for (CeedInt k = 0; k < num_suboperators; k++) { 2015 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2016 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2017 offset += single_entries; 2018 } 2019 } else { 2020 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2021 } 2022 2023 return CEED_ERROR_SUCCESS; 2024 } 2025 2026 /** 2027 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2028 2029 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2030 2031 @param[in] op Composite CeedOperator 2032 @param[in] num_skip_indices Number of suboperators to skip 2033 @param[in] skip_indices Array of indices of suboperators to skip 2034 @param[out] mult Vector to store multiplicity (of size l_size) 2035 2036 @return An error code: 0 - success, otherwise - failure 2037 2038 @ref User 2039 **/ 2040 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2041 CeedCall(CeedOperatorCheckReady(op)); 2042 2043 Ceed ceed; 2044 CeedInt num_suboperators; 2045 CeedSize l_vec_len; 2046 CeedScalar *mult_array; 2047 CeedVector ones_l_vec; 2048 CeedElemRestriction elem_rstr; 2049 CeedOperator *sub_operators; 2050 2051 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2052 2053 // Zero mult vector 2054 CeedCall(CeedVectorSetValue(mult, 0.0)); 2055 2056 // Get suboperators 2057 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2058 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2059 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2060 2061 // Work vector 2062 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2063 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2064 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2065 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2066 2067 // Compute multiplicity across suboperators 2068 for (CeedInt i = 0; i < num_suboperators; i++) { 2069 const CeedScalar *sub_mult_array; 2070 CeedVector sub_mult_l_vec, ones_e_vec; 2071 2072 // -- Check for suboperator to skip 2073 for (CeedInt j = 0; j < num_skip_indices; j++) { 2074 if (skip_indices[j] == i) continue; 2075 } 2076 2077 // -- Sub operator multiplicity 2078 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2079 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2080 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2081 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2082 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2083 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2084 // ---- Flag every node present in the current suboperator 2085 for (CeedInt j = 0; j < l_vec_len; j++) { 2086 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2087 } 2088 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2089 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2090 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2091 } 2092 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2093 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2094 2095 return CEED_ERROR_SUCCESS; 2096 } 2097 2098 /** 2099 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2100 grid interpolation 2101 2102 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2103 2104 @param[in] op_fine Fine grid operator 2105 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2106 @param[in] rstr_coarse Coarse grid restriction 2107 @param[in] basis_coarse Coarse grid active vector basis 2108 @param[out] op_coarse Coarse grid operator 2109 @param[out] op_prolong Coarse to fine operator, or NULL 2110 @param[out] op_restrict Fine to coarse operator, or NULL 2111 2112 @return An error code: 0 - success, otherwise - failure 2113 2114 @ref User 2115 **/ 2116 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2117 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2118 CeedCall(CeedOperatorCheckReady(op_fine)); 2119 2120 // Build prolongation matrix, if required 2121 CeedBasis basis_c_to_f = NULL; 2122 if (op_prolong || op_restrict) { 2123 CeedBasis basis_fine; 2124 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2125 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2126 } 2127 2128 // Core code 2129 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2130 2131 return CEED_ERROR_SUCCESS; 2132 } 2133 2134 /** 2135 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2136 2137 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2138 2139 @param[in] op_fine Fine grid operator 2140 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2141 @param[in] rstr_coarse Coarse grid restriction 2142 @param[in] basis_coarse Coarse grid active vector basis 2143 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2144 @param[out] op_coarse Coarse grid operator 2145 @param[out] op_prolong Coarse to fine operator, or NULL 2146 @param[out] op_restrict Fine to coarse operator, or NULL 2147 2148 @return An error code: 0 - success, otherwise - failure 2149 2150 @ref User 2151 **/ 2152 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2153 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2154 CeedOperator *op_restrict) { 2155 CeedCall(CeedOperatorCheckReady(op_fine)); 2156 Ceed ceed; 2157 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2158 2159 // Check for compatible quadrature spaces 2160 CeedBasis basis_fine; 2161 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2162 CeedInt Q_f, Q_c; 2163 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2164 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2165 if (Q_f != Q_c) { 2166 // LCOV_EXCL_START 2167 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2168 // LCOV_EXCL_STOP 2169 } 2170 2171 // Create coarse to fine basis, if required 2172 CeedBasis basis_c_to_f = NULL; 2173 if (op_prolong || op_restrict) { 2174 // Check if interpolation matrix is provided 2175 if (!interp_c_to_f) { 2176 // LCOV_EXCL_START 2177 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2178 // LCOV_EXCL_STOP 2179 } 2180 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2181 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2182 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2183 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2184 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2185 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2186 CeedScalar *q_ref, *q_weight, *grad; 2187 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2188 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2189 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2190 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2191 CeedCall(CeedFree(&q_ref)); 2192 CeedCall(CeedFree(&q_weight)); 2193 CeedCall(CeedFree(&grad)); 2194 } 2195 2196 // Core code 2197 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2198 return CEED_ERROR_SUCCESS; 2199 } 2200 2201 /** 2202 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2203 2204 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2205 2206 @param[in] op_fine Fine grid operator 2207 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2208 @param[in] rstr_coarse Coarse grid restriction 2209 @param[in] basis_coarse Coarse grid active vector basis 2210 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2211 @param[out] op_coarse Coarse grid operator 2212 @param[out] op_prolong Coarse to fine operator, or NULL 2213 @param[out] op_restrict Fine to coarse operator, or NULL 2214 2215 @return An error code: 0 - success, otherwise - failure 2216 2217 @ref User 2218 **/ 2219 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2220 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2221 CeedOperator *op_restrict) { 2222 CeedCall(CeedOperatorCheckReady(op_fine)); 2223 Ceed ceed; 2224 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2225 2226 // Check for compatible quadrature spaces 2227 CeedBasis basis_fine; 2228 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2229 CeedInt Q_f, Q_c; 2230 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2231 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2232 if (Q_f != Q_c) { 2233 // LCOV_EXCL_START 2234 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2235 // LCOV_EXCL_STOP 2236 } 2237 2238 // Coarse to fine basis 2239 CeedBasis basis_c_to_f = NULL; 2240 if (op_prolong || op_restrict) { 2241 // Check if interpolation matrix is provided 2242 if (!interp_c_to_f) { 2243 // LCOV_EXCL_START 2244 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2245 // LCOV_EXCL_STOP 2246 } 2247 CeedElemTopology topo; 2248 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2249 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2250 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2251 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2252 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2253 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2254 CeedScalar *q_ref, *q_weight, *grad; 2255 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2256 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2257 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2258 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2259 CeedCall(CeedFree(&q_ref)); 2260 CeedCall(CeedFree(&q_weight)); 2261 CeedCall(CeedFree(&grad)); 2262 } 2263 2264 // Core code 2265 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2266 return CEED_ERROR_SUCCESS; 2267 } 2268 2269 /** 2270 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2271 2272 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2273 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2274 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2275 \hat S V\f$. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear. 2276 2277 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2278 2279 @param[in] op CeedOperator to create element inverses 2280 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2281 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2282 2283 @return An error code: 0 - success, otherwise - failure 2284 2285 @ref User 2286 **/ 2287 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2288 CeedCall(CeedOperatorCheckReady(op)); 2289 2290 if (op->CreateFDMElementInverse) { 2291 // Backend version 2292 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2293 return CEED_ERROR_SUCCESS; 2294 } else { 2295 // Operator fallback 2296 CeedOperator op_fallback; 2297 2298 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2299 if (op_fallback) { 2300 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2301 return CEED_ERROR_SUCCESS; 2302 } 2303 } 2304 2305 // Default interface implementation 2306 Ceed ceed, ceed_parent; 2307 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2308 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2309 ceed_parent = ceed_parent ? ceed_parent : ceed; 2310 CeedQFunction qf; 2311 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2312 2313 // Determine active input basis 2314 bool interp = false, grad = false; 2315 CeedBasis basis = NULL; 2316 CeedElemRestriction rstr = NULL; 2317 CeedOperatorField *op_fields; 2318 CeedQFunctionField *qf_fields; 2319 CeedInt num_input_fields; 2320 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2321 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2322 for (CeedInt i = 0; i < num_input_fields; i++) { 2323 CeedVector vec; 2324 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2325 if (vec == CEED_VECTOR_ACTIVE) { 2326 CeedEvalMode eval_mode; 2327 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2328 interp = interp || eval_mode == CEED_EVAL_INTERP; 2329 grad = grad || eval_mode == CEED_EVAL_GRAD; 2330 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2331 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2332 } 2333 } 2334 if (!basis) { 2335 // LCOV_EXCL_START 2336 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2337 // LCOV_EXCL_STOP 2338 } 2339 CeedSize l_size = 1; 2340 CeedInt P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2341 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2342 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2343 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2344 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2345 CeedCall(CeedBasisGetDimension(basis, &dim)); 2346 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2347 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2348 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2349 2350 // Build and diagonalize 1D Mass and Laplacian 2351 bool tensor_basis; 2352 CeedCall(CeedBasisIsTensor(basis, &tensor_basis)); 2353 if (!tensor_basis) { 2354 // LCOV_EXCL_START 2355 return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2356 // LCOV_EXCL_STOP 2357 } 2358 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2359 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2360 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2361 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2362 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2363 CeedCall(CeedCalloc(P_1d, &lambda)); 2364 // -- Build matrices 2365 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2366 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2367 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2368 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2369 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2370 2371 // -- Diagonalize 2372 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2373 CeedCall(CeedFree(&mass)); 2374 CeedCall(CeedFree(&laplace)); 2375 for (CeedInt i = 0; i < P_1d; i++) { 2376 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2377 } 2378 CeedCall(CeedFree(&x)); 2379 2380 // Assemble QFunction 2381 CeedVector assembled; 2382 CeedElemRestriction rstr_qf; 2383 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2384 CeedInt layout[3]; 2385 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2386 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2387 CeedScalar max_norm = 0; 2388 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2389 2390 // Calculate element averages 2391 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2392 CeedScalar *elem_avg; 2393 const CeedScalar *assembled_array, *q_weight_array; 2394 CeedVector q_weight; 2395 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2396 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2397 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2398 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2399 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2400 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2401 for (CeedInt e = 0; e < num_elem; e++) { 2402 CeedInt count = 0; 2403 for (CeedInt q = 0; q < num_qpts; q++) { 2404 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2405 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2406 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2407 count++; 2408 } 2409 } 2410 } 2411 if (count) { 2412 elem_avg[e] /= count; 2413 } else { 2414 elem_avg[e] = 1.0; 2415 } 2416 } 2417 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2418 CeedCall(CeedVectorDestroy(&assembled)); 2419 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2420 CeedCall(CeedVectorDestroy(&q_weight)); 2421 2422 // Build FDM diagonal 2423 CeedVector q_data; 2424 CeedScalar *q_data_array, *fdm_diagonal; 2425 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2426 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2427 for (CeedInt c = 0; c < num_comp; c++) { 2428 for (CeedInt n = 0; n < num_nodes; n++) { 2429 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2430 if (grad) { 2431 for (CeedInt d = 0; d < dim; d++) { 2432 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2433 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2434 } 2435 } 2436 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2437 } 2438 } 2439 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2440 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2441 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2442 for (CeedInt e = 0; e < num_elem; e++) { 2443 for (CeedInt c = 0; c < num_comp; c++) { 2444 for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2445 } 2446 } 2447 CeedCall(CeedFree(&elem_avg)); 2448 CeedCall(CeedFree(&fdm_diagonal)); 2449 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2450 2451 // Setup FDM operator 2452 // -- Basis 2453 CeedBasis fdm_basis; 2454 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2455 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2456 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2457 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2458 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2459 CeedCall(CeedFree(&fdm_interp)); 2460 CeedCall(CeedFree(&grad_dummy)); 2461 CeedCall(CeedFree(&q_ref_dummy)); 2462 CeedCall(CeedFree(&q_weight_dummy)); 2463 CeedCall(CeedFree(&lambda)); 2464 2465 // -- Restriction 2466 CeedElemRestriction rstr_qd_i; 2467 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2468 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2469 // -- QFunction 2470 CeedQFunction qf_fdm; 2471 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2472 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2473 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2474 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2475 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2476 // -- QFunction context 2477 CeedInt *num_comp_data; 2478 CeedCall(CeedCalloc(1, &num_comp_data)); 2479 num_comp_data[0] = num_comp; 2480 CeedQFunctionContext ctx_fdm; 2481 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2482 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2483 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2484 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2485 // -- Operator 2486 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2487 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2488 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2489 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2490 2491 // Cleanup 2492 CeedCall(CeedVectorDestroy(&q_data)); 2493 CeedCall(CeedBasisDestroy(&fdm_basis)); 2494 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2495 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2496 2497 return CEED_ERROR_SUCCESS; 2498 } 2499 2500 /// @} 2501