1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <assert.h> 9 #include <ceed-impl.h> 10 #include <ceed.h> 11 #include <ceed/backend.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) { 128 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 } 130 // Cleanup 131 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 133 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 134 } 135 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 136 CeedCall(CeedOperatorCheckReady(op_fallback)); 137 op->op_fallback = op_fallback; 138 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 144 145 @param[in] op CeedOperator to retrieve fallback for 146 @param[out] op_fallback Fallback CeedOperator 147 148 @return An error code: 0 - success, otherwise - failure 149 150 @ref Developer 151 **/ 152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 153 // Create if needed 154 if (!op->op_fallback) { 155 CeedCall(CeedOperatorCreateFallback(op)); 156 } 157 if (op->op_fallback) { 158 bool is_debug; 159 160 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 161 if (is_debug) { 162 Ceed ceed, ceed_fallback; 163 const char *resource, *resource_fallback; 164 165 CeedCall(CeedOperatorGetCeed(op, &ceed)); 166 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 167 CeedCall(CeedGetResource(ceed, &resource)); 168 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 169 170 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 171 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 172 op->op_fallback); 173 } 174 } 175 *op_fallback = op->op_fallback; 176 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /** 181 @brief Select correct basis matrix pointer based on CeedEvalMode 182 183 @param[in] basis CeedBasis from which to get the basis matrix 184 @param[in] eval_mode Current basis evaluation mode 185 @param[in] identity Pointer to identity matrix 186 @param[out] basis_ptr Basis pointer to set 187 188 @ref Developer 189 **/ 190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 191 switch (eval_mode) { 192 case CEED_EVAL_NONE: 193 *basis_ptr = identity; 194 break; 195 case CEED_EVAL_INTERP: 196 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 197 break; 198 case CEED_EVAL_GRAD: 199 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 200 break; 201 case CEED_EVAL_DIV: 202 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 203 break; 204 case CEED_EVAL_CURL: 205 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 206 break; 207 case CEED_EVAL_WEIGHT: 208 break; // Caught by QF Assembly 209 } 210 assert(*basis_ptr != NULL); 211 212 return CEED_ERROR_SUCCESS; 213 } 214 215 /** 216 @brief Create point block restriction for active operator field 217 218 @param[in] rstr Original CeedElemRestriction for active field 219 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 220 221 @return An error code: 0 - success, otherwise - failure 222 223 @ref Developer 224 **/ 225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 226 Ceed ceed; 227 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 228 const CeedInt *offsets; 229 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 230 231 // Expand offsets 232 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 233 CeedSize l_size; 234 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 235 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 236 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 237 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 238 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 239 CeedInt shift = num_comp; 240 if (comp_stride != 1) shift *= num_comp; 241 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 242 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 243 pointblock_offsets[i] = offsets[i] * shift; 244 } 245 246 // Create new restriction 247 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 248 pointblock_offsets, pointblock_rstr)); 249 250 // Cleanup 251 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 252 253 return CEED_ERROR_SUCCESS; 254 } 255 256 /** 257 @brief Core logic for assembling operator diagonal or point block diagonal 258 259 @param[in] op CeedOperator to assemble point block diagonal 260 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 261 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 262 @param[out] assembled CeedVector to store assembled diagonal 263 264 @return An error code: 0 - success, otherwise - failure 265 266 @ref Developer 267 **/ 268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 269 Ceed ceed; 270 CeedCall(CeedOperatorGetCeed(op, &ceed)); 271 272 // Assemble QFunction 273 CeedQFunction qf; 274 const CeedScalar *assembled_qf_array; 275 CeedVector assembled_qf; 276 CeedElemRestriction assembled_elem_rstr; 277 CeedInt num_input_fields, num_output_fields; 278 CeedInt layout[3]; 279 280 CeedCall(CeedOperatorGetQFunction(op, &qf)); 281 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 282 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 283 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 284 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 285 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 286 287 // Get assembly data 288 CeedOperatorAssemblyData data; 289 const CeedEvalMode **eval_modes_in, **eval_modes_out; 290 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 291 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 292 CeedBasis *active_bases; 293 CeedElemRestriction *active_elem_rstrs; 294 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 295 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 296 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 297 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 298 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 299 300 // Loop over all active bases 301 for (CeedInt b = 0; b < num_active_bases; b++) { 302 // Assemble point block diagonal restriction, if needed 303 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 304 305 if (is_pointblock) { 306 CeedElemRestriction point_block_elem_rstr; 307 308 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 309 diag_elem_rstr = point_block_elem_rstr; 310 } 311 312 // Create diagonal vector 313 CeedVector elem_diag; 314 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 315 316 // Assemble element operator diagonals 317 CeedScalar *elem_diag_array; 318 CeedInt num_elem, num_nodes, num_qpts, num_components; 319 320 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 321 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 322 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 323 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 324 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 325 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 326 327 // Construct identity matrix for basis if required 328 bool has_eval_none = false; 329 CeedScalar *identity = NULL; 330 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 331 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 332 } 333 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 334 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 335 } 336 if (has_eval_none) { 337 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 338 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 339 } 340 341 // Compute the diagonal of B^T D B 342 // Each element 343 for (CeedInt e = 0; e < num_elem; e++) { 344 // Each basis eval mode pair 345 CeedInt d_out = 0, q_comp_out; 346 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 347 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 348 const CeedScalar *B_t = NULL; 349 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t); 350 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out)); 351 if (q_comp_out > 1) { 352 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 353 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 354 } 355 eval_mode_out_prev = eval_modes_out[b][e_out]; 356 357 CeedInt d_in = 0, q_comp_in; 358 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 359 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 360 const CeedScalar *B = NULL; 361 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B); 362 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in)); 363 if (q_comp_in > 1) { 364 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 365 else B = &B[(++d_in) * num_qpts * num_nodes]; 366 } 367 eval_mode_in_prev = eval_modes_in[b][e_in]; 368 369 // Each component 370 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 371 // Each qpt/node pair 372 for (CeedInt q = 0; q < num_qpts; q++) { 373 if (is_pointblock) { 374 // Point Block Diagonal 375 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 376 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 377 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 378 for (CeedInt n = 0; n < num_nodes; n++) { 379 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 380 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 381 } 382 } 383 } else { 384 // Diagonal Only 385 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 386 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 387 for (CeedInt n = 0; n < num_nodes; n++) { 388 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 389 } 390 } 391 } 392 } 393 } 394 } 395 } 396 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 397 398 // Assemble local operator diagonal 399 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 400 401 // Cleanup 402 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 403 CeedCall(CeedVectorDestroy(&elem_diag)); 404 CeedCall(CeedFree(&identity)); 405 } 406 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 407 CeedCall(CeedVectorDestroy(&assembled_qf)); 408 409 return CEED_ERROR_SUCCESS; 410 } 411 412 /** 413 @brief Core logic for assembling composite operator diagonal 414 415 @param[in] op CeedOperator to assemble point block diagonal 416 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 417 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 418 @param[out] assembled CeedVector to store assembled diagonal 419 420 @return An error code: 0 - success, otherwise - failure 421 422 @ref Developer 423 **/ 424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 CeedInt num_sub; 427 CeedOperator *suboperators; 428 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 429 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 430 for (CeedInt i = 0; i < num_sub; i++) { 431 if (is_pointblock) { 432 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 433 } else { 434 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 435 } 436 } 437 return CEED_ERROR_SUCCESS; 438 } 439 440 /** 441 @brief Build nonzero pattern for non-composite operator 442 443 Users should generally use CeedOperatorLinearAssembleSymbolic() 444 445 @param[in] op CeedOperator to assemble nonzero pattern 446 @param[in] offset Offset for number of entries 447 @param[out] rows Row number for each entry 448 @param[out] cols Column number for each entry 449 450 @return An error code: 0 - success, otherwise - failure 451 452 @ref Developer 453 **/ 454 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 455 Ceed ceed; 456 bool is_composite; 457 CeedCall(CeedOperatorGetCeed(op, &ceed)); 458 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 459 460 if (is_composite) { 461 // LCOV_EXCL_START 462 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 463 // LCOV_EXCL_STOP 464 } 465 466 CeedSize num_nodes; 467 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 468 CeedElemRestriction rstr_in; 469 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 470 CeedInt num_elem, elem_size, num_comp; 471 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 472 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 473 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 474 CeedInt layout_er[3]; 475 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 476 477 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 478 479 // Determine elem_dof relation 480 CeedVector index_vec; 481 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 482 CeedScalar *array; 483 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 484 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 485 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 486 CeedVector elem_dof; 487 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 488 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 489 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 490 const CeedScalar *elem_dof_a; 491 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 492 CeedCall(CeedVectorDestroy(&index_vec)); 493 494 // Determine i, j locations for element matrices 495 CeedInt count = 0; 496 for (CeedInt e = 0; e < num_elem; e++) { 497 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 498 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 499 for (CeedInt i = 0; i < elem_size; i++) { 500 for (CeedInt j = 0; j < elem_size; j++) { 501 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 502 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 503 504 const CeedInt row = elem_dof_a[elem_dof_index_row]; 505 const CeedInt col = elem_dof_a[elem_dof_index_col]; 506 507 rows[offset + count] = row; 508 cols[offset + count] = col; 509 count++; 510 } 511 } 512 } 513 } 514 } 515 if (count != local_num_entries) { 516 // LCOV_EXCL_START 517 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 518 // LCOV_EXCL_STOP 519 } 520 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 521 CeedCall(CeedVectorDestroy(&elem_dof)); 522 523 return CEED_ERROR_SUCCESS; 524 } 525 526 /** 527 @brief Assemble nonzero entries for non-composite operator 528 529 Users should generally use CeedOperatorLinearAssemble() 530 531 @param[in] op CeedOperator to assemble 532 @param[in] offset Offset for number of entries 533 @param[out] values Values to assemble into matrix 534 535 @return An error code: 0 - success, otherwise - failure 536 537 @ref Developer 538 **/ 539 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 540 Ceed ceed; 541 bool is_composite; 542 CeedCall(CeedOperatorGetCeed(op, &ceed)); 543 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 544 545 if (is_composite) { 546 // LCOV_EXCL_START 547 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 548 // LCOV_EXCL_STOP 549 } 550 551 // Early exit for empty operator 552 { 553 CeedInt num_elem = 0; 554 555 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 556 if (num_elem == 0) return CEED_ERROR_SUCCESS; 557 } 558 559 if (op->LinearAssembleSingle) { 560 // Backend version 561 CeedCall(op->LinearAssembleSingle(op, offset, values)); 562 return CEED_ERROR_SUCCESS; 563 } else { 564 // Operator fallback 565 CeedOperator op_fallback; 566 567 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 568 if (op_fallback) { 569 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 570 return CEED_ERROR_SUCCESS; 571 } 572 } 573 574 // Assemble QFunction 575 CeedQFunction qf; 576 CeedCall(CeedOperatorGetQFunction(op, &qf)); 577 CeedVector assembled_qf; 578 CeedElemRestriction rstr_q; 579 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 580 CeedSize qf_length; 581 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 582 583 CeedInt num_input_fields, num_output_fields; 584 CeedOperatorField *input_fields; 585 CeedOperatorField *output_fields; 586 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 587 588 // Get assembly data 589 CeedOperatorAssemblyData data; 590 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 591 const CeedEvalMode **eval_modes_in, **eval_modes_out; 592 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 593 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 594 &eval_modes_out, NULL, NULL)); 595 CeedBasis *bases; 596 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 597 CeedBasis basis_in = bases[0]; 598 599 if (num_active_bases > 1) { 600 // LCOV_EXCL_START 601 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 602 // LCOV_EXCL_STOP 603 } 604 if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) { 605 // LCOV_EXCL_START 606 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 607 // LCOV_EXCL_STOP 608 } 609 610 CeedElemRestriction active_rstr; 611 CeedInt num_elem, elem_size, num_qpts, num_comp; 612 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 613 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 614 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 615 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 616 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 617 618 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 619 620 // loop over elements and put in data structure 621 const CeedScalar *assembled_qf_array; 622 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 623 624 CeedInt layout_qf[3]; 625 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 626 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 627 628 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 629 const CeedScalar **B_mats_in, **B_mats_out; 630 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 631 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 632 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 633 CeedScalar elem_mat[elem_size * elem_size]; 634 CeedInt count = 0; 635 CeedScalar *vals; 636 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 637 for (CeedInt e = 0; e < num_elem; e++) { 638 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 639 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 640 // Compute B^T*D 641 for (CeedInt n = 0; n < elem_size; n++) { 642 for (CeedInt q = 0; q < num_qpts; q++) { 643 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 644 const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 645 CeedScalar sum = 0.0; 646 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 647 const CeedInt b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 648 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 649 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 650 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 651 } 652 BTD_mat[btd_index] = sum; 653 } 654 } 655 } 656 // form element matrix itself (for each block component) 657 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 658 659 // put element matrix in coordinate data structure 660 for (CeedInt i = 0; i < elem_size; i++) { 661 for (CeedInt j = 0; j < elem_size; j++) { 662 vals[offset + count] = elem_mat[i * elem_size + j]; 663 count++; 664 } 665 } 666 } 667 } 668 } 669 if (count != local_num_entries) { 670 // LCOV_EXCL_START 671 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 672 // LCOV_EXCL_STOP 673 } 674 CeedCall(CeedVectorRestoreArray(values, &vals)); 675 676 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 677 CeedCall(CeedVectorDestroy(&assembled_qf)); 678 679 return CEED_ERROR_SUCCESS; 680 } 681 682 /** 683 @brief Count number of entries for assembled CeedOperator 684 685 @param[in] op CeedOperator to assemble 686 @param[out] num_entries Number of entries in assembled representation 687 688 @return An error code: 0 - success, otherwise - failure 689 690 @ref Utility 691 **/ 692 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 693 bool is_composite; 694 CeedElemRestriction rstr; 695 CeedInt num_elem, elem_size, num_comp; 696 697 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 698 if (is_composite) { 699 // LCOV_EXCL_START 700 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 701 // LCOV_EXCL_STOP 702 } 703 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 704 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 705 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 706 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 707 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 708 709 return CEED_ERROR_SUCCESS; 710 } 711 712 /** 713 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 714 715 @param[in] op_fine Fine grid operator 716 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 717 @param[in] rstr_coarse Coarse grid restriction 718 @param[in] basis_coarse Coarse grid active vector basis 719 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 720 @param[out] op_coarse Coarse grid operator 721 @param[out] op_prolong Coarse to fine operator, or NULL 722 @param[out] op_restrict Fine to coarse operator, or NULL 723 724 @return An error code: 0 - success, otherwise - failure 725 726 @ref Developer 727 **/ 728 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 729 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 730 Ceed ceed; 731 CeedVector mult_vec = NULL; 732 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 733 734 // Check for composite operator 735 bool is_composite; 736 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 737 if (is_composite) { 738 // LCOV_EXCL_START 739 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 740 // LCOV_EXCL_STOP 741 } 742 743 // Coarse Grid 744 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 745 CeedElemRestriction rstr_fine = NULL; 746 // -- Clone input fields 747 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 748 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 749 rstr_fine = op_fine->input_fields[i]->elem_rstr; 750 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 751 } else { 752 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 753 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 754 } 755 } 756 // -- Clone output fields 757 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 758 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 759 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 760 } else { 761 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 762 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 763 } 764 } 765 // -- Clone QFunctionAssemblyData 766 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 767 768 // Multiplicity vector 769 if (op_restrict || op_prolong) { 770 CeedVector mult_e_vec; 771 772 if (!p_mult_fine) { 773 // LCOV_EXCL_START 774 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 775 // LCOV_EXCL_STOP 776 } 777 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 778 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 779 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 780 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 781 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 782 CeedCall(CeedVectorDestroy(&mult_e_vec)); 783 CeedCall(CeedVectorReciprocal(mult_vec)); 784 } 785 786 // Clone name 787 bool has_name = op_fine->name; 788 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 789 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 790 791 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 792 if ((op_restrict || op_prolong) && !basis_c_to_f) { 793 // LCOV_EXCL_START 794 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis"); 795 // LCOV_EXCL_STOP 796 } 797 798 // Restriction/Prolongation Operators 799 CeedInt num_comp; 800 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 801 802 // Restriction 803 if (op_restrict) { 804 CeedInt *num_comp_r_data; 805 CeedQFunction qf_restrict; 806 CeedQFunctionContext ctx_r; 807 808 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 809 CeedCall(CeedCalloc(1, &num_comp_r_data)); 810 num_comp_r_data[0] = num_comp; 811 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 812 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 813 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 814 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 815 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 816 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 817 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 818 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 819 820 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 821 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 822 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 823 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 824 825 // Set name 826 char *restriction_name; 827 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 828 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 829 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 830 CeedCall(CeedFree(&restriction_name)); 831 832 // Check 833 CeedCall(CeedOperatorCheckReady(*op_restrict)); 834 835 // Cleanup 836 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 837 } 838 839 // Prolongation 840 if (op_prolong) { 841 CeedInt *num_comp_p_data; 842 CeedQFunction qf_prolong; 843 CeedQFunctionContext ctx_p; 844 845 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 846 CeedCall(CeedCalloc(1, &num_comp_p_data)); 847 num_comp_p_data[0] = num_comp; 848 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 849 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 850 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 851 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 852 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 853 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 854 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 855 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 856 857 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 858 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 859 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 860 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 861 862 // Set name 863 char *prolongation_name; 864 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 865 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 866 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 867 CeedCall(CeedFree(&prolongation_name)); 868 869 // Check 870 CeedCall(CeedOperatorCheckReady(*op_prolong)); 871 872 // Cleanup 873 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 874 } 875 876 // Check 877 CeedCall(CeedOperatorCheckReady(*op_coarse)); 878 879 // Cleanup 880 CeedCall(CeedVectorDestroy(&mult_vec)); 881 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 882 883 return CEED_ERROR_SUCCESS; 884 } 885 886 /** 887 @brief Build 1D mass matrix and Laplacian with perturbation 888 889 @param[in] interp_1d Interpolation matrix in one dimension 890 @param[in] grad_1d Gradient matrix in one dimension 891 @param[in] q_weight_1d Quadrature weights in one dimension 892 @param[in] P_1d Number of basis nodes in one dimension 893 @param[in] Q_1d Number of quadrature points in one dimension 894 @param[in] dim Dimension of basis 895 @param[out] mass Assembled mass matrix in one dimension 896 @param[out] laplace Assembled perturbed Laplacian in one dimension 897 898 @return An error code: 0 - success, otherwise - failure 899 900 @ref Developer 901 **/ 902 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, 903 CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 904 for (CeedInt i = 0; i < P_1d; i++) { 905 for (CeedInt j = 0; j < P_1d; j++) { 906 CeedScalar sum = 0.0; 907 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 908 mass[i + j * P_1d] = sum; 909 } 910 } 911 // -- Laplacian 912 for (CeedInt i = 0; i < P_1d; i++) { 913 for (CeedInt j = 0; j < P_1d; j++) { 914 CeedScalar sum = 0.0; 915 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 916 laplace[i + j * P_1d] = sum; 917 } 918 } 919 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 920 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 921 return CEED_ERROR_SUCCESS; 922 } 923 CeedPragmaOptimizeOn; 924 925 /// @} 926 927 /// ---------------------------------------------------------------------------- 928 /// CeedOperator Backend API 929 /// ---------------------------------------------------------------------------- 930 /// @addtogroup CeedOperatorBackend 931 /// @{ 932 933 /** 934 @brief Create object holding CeedQFunction assembly data for CeedOperator 935 936 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 937 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 938 939 @return An error code: 0 - success, otherwise - failure 940 941 @ref Backend 942 **/ 943 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 944 CeedCall(CeedCalloc(1, data)); 945 (*data)->ref_count = 1; 946 (*data)->ceed = ceed; 947 CeedCall(CeedReference(ceed)); 948 949 return CEED_ERROR_SUCCESS; 950 } 951 952 /** 953 @brief Increment the reference counter for a CeedQFunctionAssemblyData 954 955 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 956 957 @return An error code: 0 - success, otherwise - failure 958 959 @ref Backend 960 **/ 961 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 962 data->ref_count++; 963 return CEED_ERROR_SUCCESS; 964 } 965 966 /** 967 @brief Set re-use of CeedQFunctionAssemblyData 968 969 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 970 @param[in] reuse_data Boolean flag indicating data re-use 971 972 @return An error code: 0 - success, otherwise - failure 973 974 @ref Backend 975 **/ 976 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 977 data->reuse_data = reuse_data; 978 data->needs_data_update = true; 979 return CEED_ERROR_SUCCESS; 980 } 981 982 /** 983 @brief Mark QFunctionAssemblyData as stale 984 985 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 986 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 987 988 @return An error code: 0 - success, otherwise - failure 989 990 @ref Backend 991 **/ 992 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 993 data->needs_data_update = needs_data_update; 994 return CEED_ERROR_SUCCESS; 995 } 996 997 /** 998 @brief Determine if QFunctionAssemblyData needs update 999 1000 @param[in] data CeedQFunctionAssemblyData to mark as stale 1001 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1002 1003 @return An error code: 0 - success, otherwise - failure 1004 1005 @ref Backend 1006 **/ 1007 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1008 *is_update_needed = !data->reuse_data || data->needs_data_update; 1009 return CEED_ERROR_SUCCESS; 1010 } 1011 1012 /** 1013 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1014 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1015 1016 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1017 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1018 CeedQFunctionAssemblyData. 1019 1020 @param[in] data CeedQFunctionAssemblyData to copy reference to 1021 @param[in,out] data_copy Variable to store copied reference 1022 1023 @return An error code: 0 - success, otherwise - failure 1024 1025 @ref Backend 1026 **/ 1027 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1028 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1029 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1030 *data_copy = data; 1031 return CEED_ERROR_SUCCESS; 1032 } 1033 1034 /** 1035 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1036 1037 @param[in] data CeedQFunctionAssemblyData to retrieve status 1038 @param[out] is_setup Boolean flag for setup status 1039 1040 @return An error code: 0 - success, otherwise - failure 1041 1042 @ref Backend 1043 **/ 1044 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1045 *is_setup = data->is_setup; 1046 return CEED_ERROR_SUCCESS; 1047 } 1048 1049 /** 1050 @brief Set internal objects for CeedQFunctionAssemblyData 1051 1052 @param[in,out] data CeedQFunctionAssemblyData to set objects 1053 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1054 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1055 1056 @return An error code: 0 - success, otherwise - failure 1057 1058 @ref Backend 1059 **/ 1060 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1061 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1062 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1063 1064 data->is_setup = true; 1065 return CEED_ERROR_SUCCESS; 1066 } 1067 1068 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1069 if (!data->is_setup) { 1070 // LCOV_EXCL_START 1071 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1072 // LCOV_EXCL_STOP 1073 } 1074 1075 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1076 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1077 1078 return CEED_ERROR_SUCCESS; 1079 } 1080 1081 /** 1082 @brief Destroy CeedQFunctionAssemblyData 1083 1084 @param[in,out] data CeedQFunctionAssemblyData to destroy 1085 1086 @return An error code: 0 - success, otherwise - failure 1087 1088 @ref Backend 1089 **/ 1090 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1091 if (!*data || --(*data)->ref_count > 0) { 1092 *data = NULL; 1093 return CEED_ERROR_SUCCESS; 1094 } 1095 CeedCall(CeedDestroy(&(*data)->ceed)); 1096 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1097 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1098 1099 CeedCall(CeedFree(data)); 1100 return CEED_ERROR_SUCCESS; 1101 } 1102 1103 /** 1104 @brief Get CeedOperatorAssemblyData 1105 1106 @param[in] op CeedOperator to assemble 1107 @param[out] data CeedQFunctionAssemblyData 1108 1109 @return An error code: 0 - success, otherwise - failure 1110 1111 @ref Backend 1112 **/ 1113 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1114 if (!op->op_assembled) { 1115 CeedOperatorAssemblyData data; 1116 1117 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1118 op->op_assembled = data; 1119 } 1120 *data = op->op_assembled; 1121 1122 return CEED_ERROR_SUCCESS; 1123 } 1124 1125 /** 1126 @brief Create object holding CeedOperator assembly data. 1127 1128 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1129 An array with references to the corresponding active CeedElemRestrictions is also stored. 1130 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1131 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1132 CeedEvalMode. 1133 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1134 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1135 1136 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1137 @param[in] op CeedOperator to be assembled 1138 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1139 1140 @return An error code: 0 - success, otherwise - failure 1141 1142 @ref Backend 1143 **/ 1144 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1145 CeedInt num_active_bases = 0; 1146 1147 // Allocate 1148 CeedCall(CeedCalloc(1, data)); 1149 (*data)->ceed = ceed; 1150 CeedCall(CeedReference(ceed)); 1151 1152 // Build OperatorAssembly data 1153 CeedQFunction qf; 1154 CeedQFunctionField *qf_fields; 1155 CeedOperatorField *op_fields; 1156 CeedInt num_input_fields; 1157 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1158 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1159 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1160 1161 // Determine active input basis 1162 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1163 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1164 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1165 for (CeedInt i = 0; i < num_input_fields; i++) { 1166 CeedVector vec; 1167 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1168 if (vec == CEED_VECTOR_ACTIVE) { 1169 CeedBasis basis_in = NULL; 1170 CeedEvalMode eval_mode; 1171 CeedInt index = -1, dim, num_comp, q_comp; 1172 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1173 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1174 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1175 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1176 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1177 for (CeedInt i = 0; i < num_active_bases; i++) { 1178 if ((*data)->active_bases[i] == basis_in) index = i; 1179 } 1180 if (index == -1) { 1181 CeedElemRestriction elem_rstr_in; 1182 index = num_active_bases; 1183 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1184 (*data)->active_bases[num_active_bases] = NULL; 1185 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1186 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1187 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1188 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1189 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1190 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1191 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1192 num_eval_modes_in[index] = 0; 1193 num_eval_modes_out[index] = 0; 1194 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1195 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1196 eval_modes_in[index] = NULL; 1197 eval_modes_out[index] = NULL; 1198 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1199 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1200 eval_mode_offsets_in[index] = NULL; 1201 eval_mode_offsets_out[index] = NULL; 1202 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1203 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1204 (*data)->assembled_bases_in[index] = NULL; 1205 (*data)->assembled_bases_out[index] = NULL; 1206 num_active_bases++; 1207 } 1208 if (eval_mode != CEED_EVAL_WEIGHT) { 1209 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1210 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1211 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1212 for (CeedInt d = 0; d < q_comp; d++) { 1213 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1214 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1215 offset += num_comp; 1216 } 1217 num_eval_modes_in[index] += q_comp; 1218 } 1219 } 1220 } 1221 (*data)->num_eval_modes_in = num_eval_modes_in; 1222 (*data)->eval_modes_in = eval_modes_in; 1223 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1224 1225 // Determine active output basis 1226 CeedInt num_output_fields; 1227 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1228 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1229 offset = 0; 1230 for (CeedInt i = 0; i < num_output_fields; i++) { 1231 CeedVector vec; 1232 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1233 if (vec == CEED_VECTOR_ACTIVE) { 1234 CeedBasis basis_out = NULL; 1235 CeedEvalMode eval_mode; 1236 CeedInt index = -1, dim, num_comp, q_comp; 1237 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1238 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1239 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1240 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1241 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1242 for (CeedInt i = 0; i < num_active_bases; i++) { 1243 if ((*data)->active_bases[i] == basis_out) index = i; 1244 } 1245 if (index == -1) { 1246 CeedElemRestriction elem_rstr_out; 1247 1248 index = num_active_bases; 1249 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1250 (*data)->active_bases[num_active_bases] = NULL; 1251 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1252 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1253 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1254 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1255 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1256 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1257 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1258 num_eval_modes_in[index] = 0; 1259 num_eval_modes_out[index] = 0; 1260 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1261 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1262 eval_modes_in[index] = NULL; 1263 eval_modes_out[index] = NULL; 1264 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1265 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1266 eval_mode_offsets_in[index] = NULL; 1267 eval_mode_offsets_out[index] = NULL; 1268 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1269 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1270 (*data)->assembled_bases_in[index] = NULL; 1271 (*data)->assembled_bases_out[index] = NULL; 1272 num_active_bases++; 1273 } 1274 if (eval_mode != CEED_EVAL_WEIGHT) { 1275 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1276 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1277 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1278 for (CeedInt d = 0; d < q_comp; d++) { 1279 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1280 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1281 offset += num_comp; 1282 } 1283 num_eval_modes_out[index] += q_comp; 1284 } 1285 } 1286 } 1287 (*data)->num_output_components = offset; 1288 (*data)->num_eval_modes_out = num_eval_modes_out; 1289 (*data)->eval_modes_out = eval_modes_out; 1290 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1291 (*data)->num_active_bases = num_active_bases; 1292 1293 return CEED_ERROR_SUCCESS; 1294 } 1295 1296 /** 1297 @brief Get CeedOperator CeedEvalModes for assembly. 1298 1299 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1300 1301 @param[in] data CeedOperatorAssemblyData 1302 @param[out] num_active_bases Total number of active bases 1303 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1304 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1305 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1306 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1307 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1308 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1309 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1310 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1311 including contributions of all active bases 1312 1313 @return An error code: 0 - success, otherwise - failure 1314 1315 1316 @ref Backend 1317 **/ 1318 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1319 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1320 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1321 if (num_active_bases) *num_active_bases = data->num_active_bases; 1322 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1323 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1324 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1325 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1326 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1327 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1328 if (num_output_components) *num_output_components = data->num_output_components; 1329 1330 return CEED_ERROR_SUCCESS; 1331 } 1332 1333 /** 1334 @brief Get CeedOperator CeedBasis data for assembly. 1335 1336 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1337 1338 @param[in] data CeedOperatorAssemblyData 1339 @param[out] num_active_bases Number of active bases, or NULL 1340 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1341 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1342 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1343 1344 @return An error code: 0 - success, otherwise - failure 1345 1346 @ref Backend 1347 **/ 1348 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1349 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1350 // Assemble B_in, B_out if needed 1351 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1352 CeedInt num_qpts; 1353 1354 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1355 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1356 CeedInt num_nodes; 1357 CeedScalar *B_in = NULL, *identity = NULL; 1358 bool has_eval_none = false; 1359 1360 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1361 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1362 1363 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1364 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1365 } 1366 if (has_eval_none) { 1367 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1368 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1369 identity[i * num_nodes + i] = 1.0; 1370 } 1371 } 1372 1373 for (CeedInt q = 0; q < num_qpts; q++) { 1374 for (CeedInt n = 0; n < num_nodes; n++) { 1375 CeedInt d_in = 0, q_comp_in; 1376 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1377 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1378 const CeedInt qq = data->num_eval_modes_in[b] * q; 1379 const CeedScalar *B = NULL; 1380 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B); 1381 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1382 if (q_comp_in > 1) { 1383 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1384 else B = &B[(++d_in) * num_qpts * num_nodes]; 1385 } 1386 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1387 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1388 } 1389 } 1390 } 1391 if (identity) CeedCall(CeedFree(identity)); 1392 data->assembled_bases_in[b] = B_in; 1393 } 1394 } 1395 1396 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1397 CeedInt num_qpts; 1398 1399 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1400 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1401 CeedInt num_nodes; 1402 bool has_eval_none = false; 1403 CeedScalar *B_out = NULL, *identity = NULL; 1404 1405 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1406 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1407 1408 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1409 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1410 } 1411 if (has_eval_none) { 1412 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1413 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1414 identity[i * num_nodes + i] = 1.0; 1415 } 1416 } 1417 1418 for (CeedInt q = 0; q < num_qpts; q++) { 1419 for (CeedInt n = 0; n < num_nodes; n++) { 1420 CeedInt d_out = 0, q_comp_out; 1421 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1422 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1423 const CeedInt qq = data->num_eval_modes_out[b] * q; 1424 const CeedScalar *B = NULL; 1425 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B); 1426 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1427 if (q_comp_out > 1) { 1428 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1429 else B = &B[(++d_out) * num_qpts * num_nodes]; 1430 } 1431 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1432 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1433 } 1434 } 1435 } 1436 if (identity) CeedCall(CeedFree(identity)); 1437 data->assembled_bases_out[b] = B_out; 1438 } 1439 } 1440 1441 // Pass out assembled data 1442 if (active_bases) *active_bases = data->active_bases; 1443 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1444 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1445 1446 return CEED_ERROR_SUCCESS; 1447 } 1448 1449 /** 1450 @brief Get CeedOperator CeedBasis data for assembly. 1451 1452 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1453 1454 @param[in] data CeedOperatorAssemblyData 1455 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1456 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1457 1458 @return An error code: 0 - success, otherwise - failure 1459 1460 @ref Backend 1461 **/ 1462 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1463 CeedElemRestriction **active_elem_rstrs) { 1464 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1465 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1466 1467 return CEED_ERROR_SUCCESS; 1468 } 1469 1470 /** 1471 @brief Destroy CeedOperatorAssemblyData 1472 1473 @param[in,out] data CeedOperatorAssemblyData to destroy 1474 1475 @return An error code: 0 - success, otherwise - failure 1476 1477 @ref Backend 1478 **/ 1479 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1480 if (!*data) { 1481 *data = NULL; 1482 return CEED_ERROR_SUCCESS; 1483 } 1484 CeedCall(CeedDestroy(&(*data)->ceed)); 1485 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1486 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1487 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1488 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1489 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1490 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1491 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1492 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1493 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1494 } 1495 CeedCall(CeedFree(&(*data)->active_bases)); 1496 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1497 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1498 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1499 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1500 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1501 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1502 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1503 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1504 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1505 1506 CeedCall(CeedFree(data)); 1507 return CEED_ERROR_SUCCESS; 1508 } 1509 1510 /// @} 1511 1512 /// ---------------------------------------------------------------------------- 1513 /// CeedOperator Public API 1514 /// ---------------------------------------------------------------------------- 1515 /// @addtogroup CeedOperatorUser 1516 /// @{ 1517 1518 /** 1519 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1520 1521 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1522 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1523 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1524 1525 Inputs and outputs are in the order provided by the 1526 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, 1527 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] 1528 and producing the output [dv_0, dv_1, v]. 1529 1530 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1531 1532 @param[in] op CeedOperator to assemble CeedQFunction 1533 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1534 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1535 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1536 1537 @return An error code: 0 - success, otherwise - failure 1538 1539 @ref User 1540 **/ 1541 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1542 CeedCall(CeedOperatorCheckReady(op)); 1543 1544 if (op->LinearAssembleQFunction) { 1545 // Backend version 1546 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1547 } else { 1548 // Operator fallback 1549 CeedOperator op_fallback; 1550 1551 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1552 if (op_fallback) { 1553 CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1554 } else { 1555 // LCOV_EXCL_START 1556 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1557 // LCOV_EXCL_STOP 1558 } 1559 } 1560 return CEED_ERROR_SUCCESS; 1561 } 1562 1563 /** 1564 @brief Assemble CeedQFunction and store result internally. 1565 Return copied references of stored data to the caller. 1566 Caller is responsible for ownership and destruction of the copied references. 1567 See also @ref CeedOperatorLinearAssembleQFunction 1568 1569 @param[in] op CeedOperator to assemble CeedQFunction 1570 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1571 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1572 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1573 1574 @return An error code: 0 - success, otherwise - failure 1575 1576 @ref User 1577 **/ 1578 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1579 CeedCall(CeedOperatorCheckReady(op)); 1580 1581 if (op->LinearAssembleQFunctionUpdate) { 1582 // Backend version 1583 bool qf_assembled_is_setup; 1584 CeedVector assembled_vec = NULL; 1585 CeedElemRestriction assembled_rstr = NULL; 1586 1587 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1588 if (qf_assembled_is_setup) { 1589 bool update_needed; 1590 1591 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1592 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1593 if (update_needed) { 1594 CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1595 } 1596 } else { 1597 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1598 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1599 } 1600 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1601 1602 // Copy reference from internally held copy 1603 *assembled = NULL; 1604 *rstr = NULL; 1605 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1606 CeedCall(CeedVectorDestroy(&assembled_vec)); 1607 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1608 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1609 } else { 1610 // Operator fallback 1611 CeedOperator op_fallback; 1612 1613 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1614 if (op_fallback) { 1615 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1616 } else { 1617 // LCOV_EXCL_START 1618 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1619 // LCOV_EXCL_STOP 1620 } 1621 } 1622 1623 return CEED_ERROR_SUCCESS; 1624 } 1625 1626 /** 1627 @brief Assemble the diagonal of a square linear CeedOperator 1628 1629 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1630 1631 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1632 1633 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1634 1635 @param[in] op CeedOperator to assemble CeedQFunction 1636 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1637 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1638 1639 @return An error code: 0 - success, otherwise - failure 1640 1641 @ref User 1642 **/ 1643 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1644 bool is_composite; 1645 CeedCall(CeedOperatorCheckReady(op)); 1646 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1647 1648 CeedSize input_size = 0, output_size = 0; 1649 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1650 if (input_size != output_size) { 1651 // LCOV_EXCL_START 1652 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1653 // LCOV_EXCL_STOP 1654 } 1655 1656 // Early exit for empty operator 1657 if (!is_composite) { 1658 CeedInt num_elem = 0; 1659 1660 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1661 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1662 } 1663 1664 if (op->LinearAssembleDiagonal) { 1665 // Backend version 1666 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1667 return CEED_ERROR_SUCCESS; 1668 } else if (op->LinearAssembleAddDiagonal) { 1669 // Backend version with zeroing first 1670 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1671 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1672 return CEED_ERROR_SUCCESS; 1673 } else { 1674 // Operator fallback 1675 CeedOperator op_fallback; 1676 1677 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1678 if (op_fallback) { 1679 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1680 return CEED_ERROR_SUCCESS; 1681 } 1682 } 1683 // Default interface implementation 1684 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1685 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1686 1687 return CEED_ERROR_SUCCESS; 1688 } 1689 1690 /** 1691 @brief Assemble the diagonal of a square linear CeedOperator 1692 1693 This sums into a CeedVector the diagonal of a linear CeedOperator. 1694 1695 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1696 1697 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1698 1699 @param[in] op CeedOperator to assemble CeedQFunction 1700 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1701 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1702 1703 @return An error code: 0 - success, otherwise - failure 1704 1705 @ref User 1706 **/ 1707 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1708 bool is_composite; 1709 CeedCall(CeedOperatorCheckReady(op)); 1710 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1711 1712 CeedSize input_size = 0, output_size = 0; 1713 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1714 if (input_size != output_size) { 1715 // LCOV_EXCL_START 1716 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1717 // LCOV_EXCL_STOP 1718 } 1719 1720 // Early exit for empty operator 1721 if (!is_composite) { 1722 CeedInt num_elem = 0; 1723 1724 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1725 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1726 } 1727 1728 if (op->LinearAssembleAddDiagonal) { 1729 // Backend version 1730 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1731 return CEED_ERROR_SUCCESS; 1732 } else { 1733 // Operator fallback 1734 CeedOperator op_fallback; 1735 1736 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1737 if (op_fallback) { 1738 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1739 return CEED_ERROR_SUCCESS; 1740 } 1741 } 1742 // Default interface implementation 1743 if (is_composite) { 1744 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1745 } else { 1746 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1747 } 1748 1749 return CEED_ERROR_SUCCESS; 1750 } 1751 1752 /** 1753 @brief Assemble the point block diagonal of a square linear CeedOperator 1754 1755 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1756 1757 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1758 1759 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1760 1761 @param[in] op CeedOperator to assemble CeedQFunction 1762 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1763 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1764 component in]. 1765 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1766 1767 @return An error code: 0 - success, otherwise - failure 1768 1769 @ref User 1770 **/ 1771 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1772 bool is_composite; 1773 CeedCall(CeedOperatorCheckReady(op)); 1774 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1775 1776 CeedSize input_size = 0, output_size = 0; 1777 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1778 if (input_size != output_size) { 1779 // LCOV_EXCL_START 1780 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1781 // LCOV_EXCL_STOP 1782 } 1783 1784 // Early exit for empty operator 1785 if (!is_composite) { 1786 CeedInt num_elem = 0; 1787 1788 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1789 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1790 } 1791 1792 if (op->LinearAssemblePointBlockDiagonal) { 1793 // Backend version 1794 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1795 return CEED_ERROR_SUCCESS; 1796 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1797 // Backend version with zeroing first 1798 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1799 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1800 return CEED_ERROR_SUCCESS; 1801 } else { 1802 // Operator fallback 1803 CeedOperator op_fallback; 1804 1805 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1806 if (op_fallback) { 1807 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1808 return CEED_ERROR_SUCCESS; 1809 } 1810 } 1811 // Default interface implementation 1812 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1813 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1814 1815 return CEED_ERROR_SUCCESS; 1816 } 1817 1818 /** 1819 @brief Assemble the point block diagonal of a square linear CeedOperator 1820 1821 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1822 1823 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1824 1825 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1826 1827 @param[in] op CeedOperator to assemble CeedQFunction 1828 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1829 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1830 component in]. 1831 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1832 1833 @return An error code: 0 - success, otherwise - failure 1834 1835 @ref User 1836 **/ 1837 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1838 bool is_composite; 1839 CeedCall(CeedOperatorCheckReady(op)); 1840 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1841 1842 CeedSize input_size = 0, output_size = 0; 1843 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1844 if (input_size != output_size) { 1845 // LCOV_EXCL_START 1846 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1847 // LCOV_EXCL_STOP 1848 } 1849 1850 // Early exit for empty operator 1851 if (!is_composite) { 1852 CeedInt num_elem = 0; 1853 1854 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1855 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1856 } 1857 1858 if (op->LinearAssembleAddPointBlockDiagonal) { 1859 // Backend version 1860 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1861 return CEED_ERROR_SUCCESS; 1862 } else { 1863 // Operator fallback 1864 CeedOperator op_fallback; 1865 1866 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1867 if (op_fallback) { 1868 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1869 return CEED_ERROR_SUCCESS; 1870 } 1871 } 1872 // Default interface implementation 1873 if (is_composite) { 1874 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1875 } else { 1876 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1877 } 1878 1879 return CEED_ERROR_SUCCESS; 1880 } 1881 1882 /** 1883 @brief Fully assemble the nonzero pattern of a linear operator. 1884 1885 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1886 1887 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1888 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j) 1889 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1890 1891 This will generally be slow unless your operator is low-order. 1892 1893 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1894 1895 @param[in] op CeedOperator to assemble 1896 @param[out] num_entries Number of entries in coordinate nonzero pattern 1897 @param[out] rows Row number for each entry 1898 @param[out] cols Column number for each entry 1899 1900 @ref User 1901 **/ 1902 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1903 CeedInt num_suboperators, single_entries; 1904 CeedOperator *sub_operators; 1905 bool is_composite; 1906 CeedCall(CeedOperatorCheckReady(op)); 1907 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1908 1909 if (op->LinearAssembleSymbolic) { 1910 // Backend version 1911 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1912 return CEED_ERROR_SUCCESS; 1913 } else { 1914 // Operator fallback 1915 CeedOperator op_fallback; 1916 1917 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1918 if (op_fallback) { 1919 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1920 return CEED_ERROR_SUCCESS; 1921 } 1922 } 1923 1924 // Default interface implementation 1925 1926 // count entries and allocate rows, cols arrays 1927 *num_entries = 0; 1928 if (is_composite) { 1929 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1930 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1931 for (CeedInt k = 0; k < num_suboperators; ++k) { 1932 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1933 *num_entries += single_entries; 1934 } 1935 } else { 1936 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1937 *num_entries += single_entries; 1938 } 1939 CeedCall(CeedCalloc(*num_entries, rows)); 1940 CeedCall(CeedCalloc(*num_entries, cols)); 1941 1942 // assemble nonzero locations 1943 CeedInt offset = 0; 1944 if (is_composite) { 1945 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1946 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1947 for (CeedInt k = 0; k < num_suboperators; ++k) { 1948 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1949 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1950 offset += single_entries; 1951 } 1952 } else { 1953 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1954 } 1955 1956 return CEED_ERROR_SUCCESS; 1957 } 1958 1959 /** 1960 @brief Fully assemble the nonzero entries of a linear operator. 1961 1962 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1963 1964 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1965 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added, 1966 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1967 1968 This will generally be slow unless your operator is low-order. 1969 1970 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1971 1972 @param[in] op CeedOperator to assemble 1973 @param[out] values Values to assemble into matrix 1974 1975 @ref User 1976 **/ 1977 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1978 CeedInt num_suboperators, single_entries = 0; 1979 CeedOperator *sub_operators; 1980 bool is_composite; 1981 CeedCall(CeedOperatorCheckReady(op)); 1982 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1983 1984 // Early exit for empty operator 1985 if (!is_composite) { 1986 CeedInt num_elem = 0; 1987 1988 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1989 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1990 } 1991 1992 if (op->LinearAssemble) { 1993 // Backend version 1994 CeedCall(op->LinearAssemble(op, values)); 1995 return CEED_ERROR_SUCCESS; 1996 } else { 1997 // Operator fallback 1998 CeedOperator op_fallback; 1999 2000 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2001 if (op_fallback) { 2002 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2003 return CEED_ERROR_SUCCESS; 2004 } 2005 } 2006 2007 // Default interface implementation 2008 CeedInt offset = 0; 2009 CeedCall(CeedVectorSetValue(values, 0.0)); 2010 if (is_composite) { 2011 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2012 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2013 for (CeedInt k = 0; k < num_suboperators; k++) { 2014 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2015 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2016 offset += single_entries; 2017 } 2018 } else { 2019 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2020 } 2021 2022 return CEED_ERROR_SUCCESS; 2023 } 2024 2025 /** 2026 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2027 2028 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2029 2030 @param[in] op Composite CeedOperator 2031 @param[in] num_skip_indices Number of suboperators to skip 2032 @param[in] skip_indices Array of indices of suboperators to skip 2033 @param[out] mult Vector to store multiplicity (of size l_size) 2034 2035 @return An error code: 0 - success, otherwise - failure 2036 2037 @ref User 2038 **/ 2039 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2040 CeedCall(CeedOperatorCheckReady(op)); 2041 2042 Ceed ceed; 2043 CeedInt num_suboperators; 2044 CeedSize l_vec_len; 2045 CeedScalar *mult_array; 2046 CeedVector ones_l_vec; 2047 CeedElemRestriction elem_rstr; 2048 CeedOperator *sub_operators; 2049 2050 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2051 2052 // Zero mult vector 2053 CeedCall(CeedVectorSetValue(mult, 0.0)); 2054 2055 // Get suboperators 2056 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2057 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2058 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2059 2060 // Work vector 2061 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2062 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2063 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2064 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2065 2066 // Compute multiplicity across suboperators 2067 for (CeedInt i = 0; i < num_suboperators; i++) { 2068 const CeedScalar *sub_mult_array; 2069 CeedVector sub_mult_l_vec, ones_e_vec; 2070 2071 // -- Check for suboperator to skip 2072 for (CeedInt j = 0; j < num_skip_indices; j++) { 2073 if (skip_indices[j] == i) continue; 2074 } 2075 2076 // -- Sub operator multiplicity 2077 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2078 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2079 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2080 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2081 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2082 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2083 // ---- Flag every node present in the current suboperator 2084 for (CeedInt j = 0; j < l_vec_len; j++) { 2085 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2086 } 2087 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2088 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2089 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2090 } 2091 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2092 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2093 2094 return CEED_ERROR_SUCCESS; 2095 } 2096 2097 /** 2098 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2099 grid interpolation 2100 2101 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2102 2103 @param[in] op_fine Fine grid operator 2104 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2105 @param[in] rstr_coarse Coarse grid restriction 2106 @param[in] basis_coarse Coarse grid active vector basis 2107 @param[out] op_coarse Coarse grid operator 2108 @param[out] op_prolong Coarse to fine operator, or NULL 2109 @param[out] op_restrict Fine to coarse operator, or NULL 2110 2111 @return An error code: 0 - success, otherwise - failure 2112 2113 @ref User 2114 **/ 2115 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2116 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2117 CeedCall(CeedOperatorCheckReady(op_fine)); 2118 2119 // Build prolongation matrix, if required 2120 CeedBasis basis_c_to_f = NULL; 2121 if (op_prolong || op_restrict) { 2122 CeedBasis basis_fine; 2123 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2124 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2125 } 2126 2127 // Core code 2128 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2129 2130 return CEED_ERROR_SUCCESS; 2131 } 2132 2133 /** 2134 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2135 2136 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2137 2138 @param[in] op_fine Fine grid operator 2139 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2140 @param[in] rstr_coarse Coarse grid restriction 2141 @param[in] basis_coarse Coarse grid active vector basis 2142 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2143 @param[out] op_coarse Coarse grid operator 2144 @param[out] op_prolong Coarse to fine operator, or NULL 2145 @param[out] op_restrict Fine to coarse operator, or NULL 2146 2147 @return An error code: 0 - success, otherwise - failure 2148 2149 @ref User 2150 **/ 2151 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2152 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2153 CeedOperator *op_restrict) { 2154 CeedCall(CeedOperatorCheckReady(op_fine)); 2155 Ceed ceed; 2156 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2157 2158 // Check for compatible quadrature spaces 2159 CeedBasis basis_fine; 2160 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2161 CeedInt Q_f, Q_c; 2162 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2163 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2164 if (Q_f != Q_c) { 2165 // LCOV_EXCL_START 2166 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2167 // LCOV_EXCL_STOP 2168 } 2169 2170 // Create coarse to fine basis, if required 2171 CeedBasis basis_c_to_f = NULL; 2172 if (op_prolong || op_restrict) { 2173 // Check if interpolation matrix is provided 2174 if (!interp_c_to_f) { 2175 // LCOV_EXCL_START 2176 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2177 // LCOV_EXCL_STOP 2178 } 2179 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2180 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2181 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2182 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2183 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2184 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2185 CeedScalar *q_ref, *q_weight, *grad; 2186 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2187 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2188 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2189 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2190 CeedCall(CeedFree(&q_ref)); 2191 CeedCall(CeedFree(&q_weight)); 2192 CeedCall(CeedFree(&grad)); 2193 } 2194 2195 // Core code 2196 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2197 return CEED_ERROR_SUCCESS; 2198 } 2199 2200 /** 2201 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2202 2203 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2204 2205 @param[in] op_fine Fine grid operator 2206 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2207 @param[in] rstr_coarse Coarse grid restriction 2208 @param[in] basis_coarse Coarse grid active vector basis 2209 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2210 @param[out] op_coarse Coarse grid operator 2211 @param[out] op_prolong Coarse to fine operator, or NULL 2212 @param[out] op_restrict Fine to coarse operator, or NULL 2213 2214 @return An error code: 0 - success, otherwise - failure 2215 2216 @ref User 2217 **/ 2218 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2219 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2220 CeedOperator *op_restrict) { 2221 CeedCall(CeedOperatorCheckReady(op_fine)); 2222 Ceed ceed; 2223 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2224 2225 // Check for compatible quadrature spaces 2226 CeedBasis basis_fine; 2227 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2228 CeedInt Q_f, Q_c; 2229 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2230 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2231 if (Q_f != Q_c) { 2232 // LCOV_EXCL_START 2233 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2234 // LCOV_EXCL_STOP 2235 } 2236 2237 // Coarse to fine basis 2238 CeedBasis basis_c_to_f = NULL; 2239 if (op_prolong || op_restrict) { 2240 // Check if interpolation matrix is provided 2241 if (!interp_c_to_f) { 2242 // LCOV_EXCL_START 2243 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2244 // LCOV_EXCL_STOP 2245 } 2246 CeedElemTopology topo; 2247 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2248 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2249 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2250 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2251 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2252 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2253 CeedScalar *q_ref, *q_weight, *grad; 2254 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2255 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2256 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2257 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2258 CeedCall(CeedFree(&q_ref)); 2259 CeedCall(CeedFree(&q_weight)); 2260 CeedCall(CeedFree(&grad)); 2261 } 2262 2263 // Core code 2264 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2265 return CEED_ERROR_SUCCESS; 2266 } 2267 2268 /** 2269 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2270 2271 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2272 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2273 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2274 \hat S V\f$. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear. 2275 2276 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2277 2278 @param[in] op CeedOperator to create element inverses 2279 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2280 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2281 2282 @return An error code: 0 - success, otherwise - failure 2283 2284 @ref User 2285 **/ 2286 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2287 CeedCall(CeedOperatorCheckReady(op)); 2288 2289 if (op->CreateFDMElementInverse) { 2290 // Backend version 2291 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2292 return CEED_ERROR_SUCCESS; 2293 } else { 2294 // Operator fallback 2295 CeedOperator op_fallback; 2296 2297 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2298 if (op_fallback) { 2299 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2300 return CEED_ERROR_SUCCESS; 2301 } 2302 } 2303 2304 // Default interface implementation 2305 Ceed ceed, ceed_parent; 2306 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2307 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2308 ceed_parent = ceed_parent ? ceed_parent : ceed; 2309 CeedQFunction qf; 2310 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2311 2312 // Determine active input basis 2313 bool interp = false, grad = false; 2314 CeedBasis basis = NULL; 2315 CeedElemRestriction rstr = NULL; 2316 CeedOperatorField *op_fields; 2317 CeedQFunctionField *qf_fields; 2318 CeedInt num_input_fields; 2319 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2320 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2321 for (CeedInt i = 0; i < num_input_fields; i++) { 2322 CeedVector vec; 2323 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2324 if (vec == CEED_VECTOR_ACTIVE) { 2325 CeedEvalMode eval_mode; 2326 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2327 interp = interp || eval_mode == CEED_EVAL_INTERP; 2328 grad = grad || eval_mode == CEED_EVAL_GRAD; 2329 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2330 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2331 } 2332 } 2333 if (!basis) { 2334 // LCOV_EXCL_START 2335 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2336 // LCOV_EXCL_STOP 2337 } 2338 CeedSize l_size = 1; 2339 CeedInt P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2340 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2341 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2342 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2343 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2344 CeedCall(CeedBasisGetDimension(basis, &dim)); 2345 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2346 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2347 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2348 2349 // Build and diagonalize 1D Mass and Laplacian 2350 bool tensor_basis; 2351 CeedCall(CeedBasisIsTensor(basis, &tensor_basis)); 2352 if (!tensor_basis) { 2353 // LCOV_EXCL_START 2354 return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2355 // LCOV_EXCL_STOP 2356 } 2357 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2358 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2359 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2360 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2361 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2362 CeedCall(CeedCalloc(P_1d, &lambda)); 2363 // -- Build matrices 2364 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2365 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2366 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2367 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2368 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2369 2370 // -- Diagonalize 2371 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2372 CeedCall(CeedFree(&mass)); 2373 CeedCall(CeedFree(&laplace)); 2374 for (CeedInt i = 0; i < P_1d; i++) { 2375 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2376 } 2377 CeedCall(CeedFree(&x)); 2378 2379 // Assemble QFunction 2380 CeedVector assembled; 2381 CeedElemRestriction rstr_qf; 2382 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2383 CeedInt layout[3]; 2384 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2385 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2386 CeedScalar max_norm = 0; 2387 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2388 2389 // Calculate element averages 2390 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2391 CeedScalar *elem_avg; 2392 const CeedScalar *assembled_array, *q_weight_array; 2393 CeedVector q_weight; 2394 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2395 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2396 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2397 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2398 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2399 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2400 for (CeedInt e = 0; e < num_elem; e++) { 2401 CeedInt count = 0; 2402 for (CeedInt q = 0; q < num_qpts; q++) { 2403 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2404 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2405 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2406 count++; 2407 } 2408 } 2409 } 2410 if (count) { 2411 elem_avg[e] /= count; 2412 } else { 2413 elem_avg[e] = 1.0; 2414 } 2415 } 2416 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2417 CeedCall(CeedVectorDestroy(&assembled)); 2418 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2419 CeedCall(CeedVectorDestroy(&q_weight)); 2420 2421 // Build FDM diagonal 2422 CeedVector q_data; 2423 CeedScalar *q_data_array, *fdm_diagonal; 2424 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2425 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2426 for (CeedInt c = 0; c < num_comp; c++) { 2427 for (CeedInt n = 0; n < num_nodes; n++) { 2428 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2429 if (grad) { 2430 for (CeedInt d = 0; d < dim; d++) { 2431 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2432 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2433 } 2434 } 2435 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2436 } 2437 } 2438 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2439 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2440 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2441 for (CeedInt e = 0; e < num_elem; e++) { 2442 for (CeedInt c = 0; c < num_comp; c++) { 2443 for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2444 } 2445 } 2446 CeedCall(CeedFree(&elem_avg)); 2447 CeedCall(CeedFree(&fdm_diagonal)); 2448 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2449 2450 // Setup FDM operator 2451 // -- Basis 2452 CeedBasis fdm_basis; 2453 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2454 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2455 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2456 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2457 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2458 CeedCall(CeedFree(&fdm_interp)); 2459 CeedCall(CeedFree(&grad_dummy)); 2460 CeedCall(CeedFree(&q_ref_dummy)); 2461 CeedCall(CeedFree(&q_weight_dummy)); 2462 CeedCall(CeedFree(&lambda)); 2463 2464 // -- Restriction 2465 CeedElemRestriction rstr_qd_i; 2466 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2467 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2468 // -- QFunction 2469 CeedQFunction qf_fdm; 2470 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2471 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2472 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2473 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2474 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2475 // -- QFunction context 2476 CeedInt *num_comp_data; 2477 CeedCall(CeedCalloc(1, &num_comp_data)); 2478 num_comp_data[0] = num_comp; 2479 CeedQFunctionContext ctx_fdm; 2480 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2481 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2482 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2483 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2484 // -- Operator 2485 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2486 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2487 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2488 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2489 2490 // Cleanup 2491 CeedCall(CeedVectorDestroy(&q_data)); 2492 CeedCall(CeedBasisDestroy(&fdm_basis)); 2493 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2494 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2495 2496 return CEED_ERROR_SUCCESS; 2497 } 2498 2499 /// @} 2500