1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <assert.h> 9 #include <ceed-impl.h> 10 #include <ceed.h> 11 #include <ceed/backend.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) { 128 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 } 130 // Cleanup 131 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 133 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 134 } 135 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 136 CeedCall(CeedOperatorCheckReady(op_fallback)); 137 op->op_fallback = op_fallback; 138 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 144 145 @param[in] op CeedOperator to retrieve fallback for 146 @param[out] op_fallback Fallback CeedOperator 147 148 @return An error code: 0 - success, otherwise - failure 149 150 @ref Developer 151 **/ 152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 153 // Create if needed 154 if (!op->op_fallback) { 155 CeedCall(CeedOperatorCreateFallback(op)); 156 } 157 if (op->op_fallback) { 158 bool is_debug; 159 160 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 161 if (is_debug) { 162 Ceed ceed, ceed_fallback; 163 const char *resource, *resource_fallback; 164 165 CeedCall(CeedOperatorGetCeed(op, &ceed)); 166 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 167 CeedCall(CeedGetResource(ceed, &resource)); 168 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 169 170 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 171 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 172 op->op_fallback); 173 } 174 } 175 *op_fallback = op->op_fallback; 176 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /** 181 @brief Select correct basis matrix pointer based on CeedEvalMode 182 183 @param[in] basis CeedBasis from which to get the basis matrix 184 @param[in] eval_mode Current basis evaluation mode 185 @param[in] identity Pointer to identity matrix 186 @param[out] basis_ptr Basis pointer to set 187 188 @ref Developer 189 **/ 190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 191 switch (eval_mode) { 192 case CEED_EVAL_NONE: 193 *basis_ptr = identity; 194 break; 195 case CEED_EVAL_INTERP: 196 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 197 break; 198 case CEED_EVAL_GRAD: 199 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 200 break; 201 case CEED_EVAL_DIV: 202 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 203 break; 204 case CEED_EVAL_CURL: 205 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 206 break; 207 case CEED_EVAL_WEIGHT: 208 break; // Caught by QF Assembly 209 } 210 assert(*basis_ptr != NULL); 211 212 return CEED_ERROR_SUCCESS; 213 } 214 215 /** 216 @brief Create point block restriction for active operator field 217 218 @param[in] rstr Original CeedElemRestriction for active field 219 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 220 221 @return An error code: 0 - success, otherwise - failure 222 223 @ref Developer 224 **/ 225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 226 Ceed ceed; 227 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 228 const CeedInt *offsets; 229 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 230 231 // Expand offsets 232 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 233 CeedSize l_size; 234 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 235 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 236 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 237 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 238 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 239 CeedInt shift = num_comp; 240 if (comp_stride != 1) shift *= num_comp; 241 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 242 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 243 pointblock_offsets[i] = offsets[i] * shift; 244 } 245 246 // Create new restriction 247 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 248 pointblock_offsets, pointblock_rstr)); 249 250 // Cleanup 251 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 252 253 return CEED_ERROR_SUCCESS; 254 } 255 256 /** 257 @brief Core logic for assembling operator diagonal or point block diagonal 258 259 @param[in] op CeedOperator to assemble point block diagonal 260 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 261 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 262 @param[out] assembled CeedVector to store assembled diagonal 263 264 @return An error code: 0 - success, otherwise - failure 265 266 @ref Developer 267 **/ 268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 269 Ceed ceed; 270 CeedCall(CeedOperatorGetCeed(op, &ceed)); 271 272 // Assemble QFunction 273 CeedQFunction qf; 274 const CeedScalar *assembled_qf_array; 275 CeedVector assembled_qf; 276 CeedElemRestriction assembled_elem_rstr; 277 CeedInt num_input_fields, num_output_fields; 278 CeedInt layout[3]; 279 280 CeedCall(CeedOperatorGetQFunction(op, &qf)); 281 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 282 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 283 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 284 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 285 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 286 287 // Get assembly data 288 CeedOperatorAssemblyData data; 289 const CeedEvalMode **eval_modes_in, **eval_modes_out; 290 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 291 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 292 CeedBasis *active_bases; 293 CeedElemRestriction *active_elem_rstrs; 294 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 295 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 296 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 297 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 298 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 299 300 // Loop over all active bases 301 for (CeedInt b = 0; b < num_active_bases; b++) { 302 // Assemble point block diagonal restriction, if needed 303 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 304 305 if (is_pointblock) { 306 CeedElemRestriction point_block_elem_rstr; 307 308 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 309 diag_elem_rstr = point_block_elem_rstr; 310 } 311 312 // Create diagonal vector 313 CeedVector elem_diag; 314 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 315 316 // Assemble element operator diagonals 317 CeedScalar *elem_diag_array; 318 CeedInt num_elem, num_nodes, num_qpts, num_components; 319 320 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 321 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 322 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 323 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 324 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 325 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 326 327 // Construct identity matrix for basis if required 328 bool has_eval_none = false; 329 CeedScalar *identity = NULL; 330 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 331 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 332 } 333 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 334 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 335 } 336 if (has_eval_none) { 337 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 338 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 339 } 340 341 // Compute the diagonal of B^T D B 342 // Each element 343 for (CeedInt e = 0; e < num_elem; e++) { 344 // Each basis eval mode pair 345 CeedInt d_out = 0, q_comp_out; 346 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 347 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 348 const CeedScalar *B_t = NULL; 349 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t); 350 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out)); 351 if (q_comp_out > 1) { 352 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 353 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 354 } 355 eval_mode_out_prev = eval_modes_out[b][e_out]; 356 357 CeedInt d_in = 0, q_comp_in; 358 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 359 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 360 const CeedScalar *B = NULL; 361 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B); 362 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in)); 363 if (q_comp_in > 1) { 364 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 365 else B = &B[(++d_in) * num_qpts * num_nodes]; 366 } 367 eval_mode_in_prev = eval_modes_in[b][e_in]; 368 369 // Each component 370 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 371 // Each qpt/node pair 372 for (CeedInt q = 0; q < num_qpts; q++) { 373 if (is_pointblock) { 374 // Point Block Diagonal 375 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 376 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 377 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 378 for (CeedInt n = 0; n < num_nodes; n++) { 379 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 380 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 381 } 382 } 383 } else { 384 // Diagonal Only 385 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 386 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 387 for (CeedInt n = 0; n < num_nodes; n++) { 388 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 389 } 390 } 391 } 392 } 393 } 394 } 395 } 396 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 397 398 // Assemble local operator diagonal 399 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 400 401 // Cleanup 402 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 403 CeedCall(CeedVectorDestroy(&elem_diag)); 404 CeedCall(CeedFree(&identity)); 405 } 406 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 407 CeedCall(CeedVectorDestroy(&assembled_qf)); 408 409 return CEED_ERROR_SUCCESS; 410 } 411 412 /** 413 @brief Core logic for assembling composite operator diagonal 414 415 @param[in] op CeedOperator to assemble point block diagonal 416 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 417 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 418 @param[out] assembled CeedVector to store assembled diagonal 419 420 @return An error code: 0 - success, otherwise - failure 421 422 @ref Developer 423 **/ 424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 CeedInt num_sub; 427 CeedOperator *suboperators; 428 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 429 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 430 for (CeedInt i = 0; i < num_sub; i++) { 431 if (is_pointblock) { 432 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 433 } else { 434 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 435 } 436 } 437 return CEED_ERROR_SUCCESS; 438 } 439 440 /** 441 @brief Build nonzero pattern for non-composite operator 442 443 Users should generally use CeedOperatorLinearAssembleSymbolic() 444 445 @param[in] op CeedOperator to assemble nonzero pattern 446 @param[in] offset Offset for number of entries 447 @param[out] rows Row number for each entry 448 @param[out] cols Column number for each entry 449 450 @return An error code: 0 - success, otherwise - failure 451 452 @ref Developer 453 **/ 454 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 455 Ceed ceed; 456 bool is_composite; 457 CeedCall(CeedOperatorGetCeed(op, &ceed)); 458 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 459 460 if (is_composite) { 461 // LCOV_EXCL_START 462 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 463 // LCOV_EXCL_STOP 464 } 465 466 CeedSize num_nodes; 467 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 468 CeedElemRestriction rstr_in; 469 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 470 CeedInt num_elem, elem_size, num_comp; 471 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 472 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 473 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 474 CeedInt layout_er[3]; 475 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 476 477 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 478 479 // Determine elem_dof relation 480 CeedVector index_vec; 481 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 482 CeedScalar *array; 483 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 484 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 485 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 486 CeedVector elem_dof; 487 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 488 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 489 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 490 const CeedScalar *elem_dof_a; 491 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 492 CeedCall(CeedVectorDestroy(&index_vec)); 493 494 // Determine i, j locations for element matrices 495 CeedInt count = 0; 496 for (CeedInt e = 0; e < num_elem; e++) { 497 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 498 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 499 for (CeedInt i = 0; i < elem_size; i++) { 500 for (CeedInt j = 0; j < elem_size; j++) { 501 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 502 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 503 504 const CeedInt row = elem_dof_a[elem_dof_index_row]; 505 const CeedInt col = elem_dof_a[elem_dof_index_col]; 506 507 rows[offset + count] = row; 508 cols[offset + count] = col; 509 count++; 510 } 511 } 512 } 513 } 514 } 515 if (count != local_num_entries) { 516 // LCOV_EXCL_START 517 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 518 // LCOV_EXCL_STOP 519 } 520 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 521 CeedCall(CeedVectorDestroy(&elem_dof)); 522 523 return CEED_ERROR_SUCCESS; 524 } 525 526 /** 527 @brief Assemble nonzero entries for non-composite operator 528 529 Users should generally use CeedOperatorLinearAssemble() 530 531 @param[in] op CeedOperator to assemble 532 @param[in] offset Offset for number of entries 533 @param[out] values Values to assemble into matrix 534 535 @return An error code: 0 - success, otherwise - failure 536 537 @ref Developer 538 **/ 539 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 540 Ceed ceed; 541 bool is_composite; 542 CeedCall(CeedOperatorGetCeed(op, &ceed)); 543 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 544 545 if (is_composite) { 546 // LCOV_EXCL_START 547 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 548 // LCOV_EXCL_STOP 549 } 550 551 // Early exit for empty operator 552 { 553 CeedInt num_elem = 0; 554 555 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 556 if (num_elem == 0) return CEED_ERROR_SUCCESS; 557 } 558 559 if (op->LinearAssembleSingle) { 560 // Backend version 561 CeedCall(op->LinearAssembleSingle(op, offset, values)); 562 return CEED_ERROR_SUCCESS; 563 } else { 564 // Operator fallback 565 CeedOperator op_fallback; 566 567 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 568 if (op_fallback) { 569 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 570 return CEED_ERROR_SUCCESS; 571 } 572 } 573 574 // Assemble QFunction 575 CeedQFunction qf; 576 CeedCall(CeedOperatorGetQFunction(op, &qf)); 577 CeedVector assembled_qf; 578 CeedElemRestriction rstr_q; 579 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 580 CeedSize qf_length; 581 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 582 583 CeedInt num_input_fields, num_output_fields; 584 CeedOperatorField *input_fields; 585 CeedOperatorField *output_fields; 586 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 587 588 // Get assembly data 589 CeedOperatorAssemblyData data; 590 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 591 const CeedEvalMode **eval_modes_in, **eval_modes_out; 592 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 593 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 594 &eval_modes_out, NULL, NULL)); 595 CeedBasis *bases; 596 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 597 CeedBasis basis_in = bases[0]; 598 599 if (num_active_bases > 1) { 600 // LCOV_EXCL_START 601 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 602 // LCOV_EXCL_STOP 603 } 604 if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) { 605 // LCOV_EXCL_START 606 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 607 // LCOV_EXCL_STOP 608 } 609 610 CeedElemRestriction active_rstr; 611 CeedInt num_elem, elem_size, num_qpts, num_comp; 612 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 613 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 614 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 615 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 616 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 617 618 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 619 620 // loop over elements and put in data structure 621 const CeedScalar *assembled_qf_array; 622 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 623 624 CeedInt layout_qf[3]; 625 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 626 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 627 628 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 629 const CeedScalar **B_mats_in, **B_mats_out; 630 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 631 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 632 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 633 CeedScalar elem_mat[elem_size * elem_size]; 634 CeedInt count = 0; 635 CeedScalar *vals; 636 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 637 for (CeedInt e = 0; e < num_elem; e++) { 638 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 639 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 640 // Compute B^T*D 641 for (CeedInt n = 0; n < elem_size; n++) { 642 for (CeedInt q = 0; q < num_qpts; q++) { 643 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 644 const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 645 CeedScalar sum = 0.0; 646 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 647 const CeedInt b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 648 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 649 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 650 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 651 } 652 BTD_mat[btd_index] = sum; 653 } 654 } 655 } 656 // form element matrix itself (for each block component) 657 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 658 659 // put element matrix in coordinate data structure 660 for (CeedInt i = 0; i < elem_size; i++) { 661 for (CeedInt j = 0; j < elem_size; j++) { 662 vals[offset + count] = elem_mat[i * elem_size + j]; 663 count++; 664 } 665 } 666 } 667 } 668 } 669 if (count != local_num_entries) { 670 // LCOV_EXCL_START 671 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 672 // LCOV_EXCL_STOP 673 } 674 CeedCall(CeedVectorRestoreArray(values, &vals)); 675 676 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 677 CeedCall(CeedVectorDestroy(&assembled_qf)); 678 679 return CEED_ERROR_SUCCESS; 680 } 681 682 /** 683 @brief Count number of entries for assembled CeedOperator 684 685 @param[in] op CeedOperator to assemble 686 @param[out] num_entries Number of entries in assembled representation 687 688 @return An error code: 0 - success, otherwise - failure 689 690 @ref Utility 691 **/ 692 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 693 bool is_composite; 694 CeedElemRestriction rstr; 695 CeedInt num_elem, elem_size, num_comp; 696 697 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 698 if (is_composite) { 699 // LCOV_EXCL_START 700 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 701 // LCOV_EXCL_STOP 702 } 703 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 704 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 705 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 706 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 707 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 708 709 return CEED_ERROR_SUCCESS; 710 } 711 712 /** 713 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 714 715 @param[in] op_fine Fine grid operator 716 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 717 @param[in] rstr_coarse Coarse grid restriction 718 @param[in] basis_coarse Coarse grid active vector basis 719 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 720 @param[out] op_coarse Coarse grid operator 721 @param[out] op_prolong Coarse to fine operator, or NULL 722 @param[out] op_restrict Fine to coarse operator, or NULL 723 724 @return An error code: 0 - success, otherwise - failure 725 726 @ref Developer 727 **/ 728 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 729 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 730 Ceed ceed; 731 CeedVector mult_vec = NULL; 732 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 733 734 // Check for composite operator 735 bool is_composite; 736 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 737 if (is_composite) { 738 // LCOV_EXCL_START 739 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 740 // LCOV_EXCL_STOP 741 } 742 743 // Coarse Grid 744 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 745 CeedElemRestriction rstr_fine = NULL; 746 // -- Clone input fields 747 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 748 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 749 rstr_fine = op_fine->input_fields[i]->elem_rstr; 750 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 751 } else { 752 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 753 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 754 } 755 } 756 // -- Clone output fields 757 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 758 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 759 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 760 } else { 761 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 762 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 763 } 764 } 765 // -- Clone QFunctionAssemblyData 766 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 767 768 // Multiplicity vector 769 if (op_restrict || op_prolong) { 770 CeedVector mult_e_vec; 771 772 if (!p_mult_fine) { 773 // LCOV_EXCL_START 774 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 775 // LCOV_EXCL_STOP 776 } 777 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 778 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 779 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 780 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 781 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 782 CeedCall(CeedVectorDestroy(&mult_e_vec)); 783 CeedCall(CeedVectorReciprocal(mult_vec)); 784 } 785 786 // Clone name 787 bool has_name = op_fine->name; 788 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 789 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 790 791 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 792 if ((op_restrict || op_prolong) && !basis_c_to_f) { 793 // LCOV_EXCL_START 794 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis"); 795 // LCOV_EXCL_STOP 796 } 797 798 // Restriction/Prolongation Operators 799 CeedInt num_comp; 800 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 801 802 // Restriction 803 if (op_restrict) { 804 CeedInt *num_comp_r_data; 805 CeedQFunction qf_restrict; 806 CeedQFunctionContext ctx_r; 807 808 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 809 CeedCall(CeedCalloc(1, &num_comp_r_data)); 810 num_comp_r_data[0] = num_comp; 811 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 812 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 813 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 814 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 815 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 816 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 817 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 818 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 819 820 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 821 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 822 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 823 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 824 825 // Set name 826 char *restriction_name; 827 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 828 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 829 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 830 CeedCall(CeedFree(&restriction_name)); 831 832 // Check 833 CeedCall(CeedOperatorCheckReady(*op_restrict)); 834 835 // Cleanup 836 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 837 } 838 839 // Prolongation 840 if (op_prolong) { 841 CeedInt *num_comp_p_data; 842 CeedQFunction qf_prolong; 843 CeedQFunctionContext ctx_p; 844 845 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 846 CeedCall(CeedCalloc(1, &num_comp_p_data)); 847 num_comp_p_data[0] = num_comp; 848 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 849 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 850 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 851 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 852 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 853 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 854 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 855 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 856 857 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 858 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 859 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 860 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 861 862 // Set name 863 char *prolongation_name; 864 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 865 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 866 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 867 CeedCall(CeedFree(&prolongation_name)); 868 869 // Check 870 CeedCall(CeedOperatorCheckReady(*op_prolong)); 871 872 // Cleanup 873 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 874 } 875 876 // Check 877 CeedCall(CeedOperatorCheckReady(*op_coarse)); 878 879 // Cleanup 880 CeedCall(CeedVectorDestroy(&mult_vec)); 881 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 882 883 return CEED_ERROR_SUCCESS; 884 } 885 886 /** 887 @brief Build 1D mass matrix and Laplacian with perturbation 888 889 @param[in] interp_1d Interpolation matrix in one dimension 890 @param[in] grad_1d Gradient matrix in one dimension 891 @param[in] q_weight_1d Quadrature weights in one dimension 892 @param[in] P_1d Number of basis nodes in one dimension 893 @param[in] Q_1d Number of quadrature points in one dimension 894 @param[in] dim Dimension of basis 895 @param[out] mass Assembled mass matrix in one dimension 896 @param[out] laplace Assembled perturbed Laplacian in one dimension 897 898 @return An error code: 0 - success, otherwise - failure 899 900 @ref Developer 901 **/ 902 CeedPragmaOptimizeOff 903 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 904 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 905 for (CeedInt i = 0; i < P_1d; i++) { 906 for (CeedInt j = 0; j < P_1d; j++) { 907 CeedScalar sum = 0.0; 908 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 909 mass[i + j * P_1d] = sum; 910 } 911 } 912 // -- Laplacian 913 for (CeedInt i = 0; i < P_1d; i++) { 914 for (CeedInt j = 0; j < P_1d; j++) { 915 CeedScalar sum = 0.0; 916 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 917 laplace[i + j * P_1d] = sum; 918 } 919 } 920 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 921 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 922 return CEED_ERROR_SUCCESS; 923 } 924 CeedPragmaOptimizeOn 925 926 /// @} 927 928 /// ---------------------------------------------------------------------------- 929 /// CeedOperator Backend API 930 /// ---------------------------------------------------------------------------- 931 /// @addtogroup CeedOperatorBackend 932 /// @{ 933 934 /** 935 @brief Create object holding CeedQFunction assembly data for CeedOperator 936 937 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 938 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 939 940 @return An error code: 0 - success, otherwise - failure 941 942 @ref Backend 943 **/ 944 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 945 CeedCall(CeedCalloc(1, data)); 946 (*data)->ref_count = 1; 947 (*data)->ceed = ceed; 948 CeedCall(CeedReference(ceed)); 949 950 return CEED_ERROR_SUCCESS; 951 } 952 953 /** 954 @brief Increment the reference counter for a CeedQFunctionAssemblyData 955 956 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 957 958 @return An error code: 0 - success, otherwise - failure 959 960 @ref Backend 961 **/ 962 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 963 data->ref_count++; 964 return CEED_ERROR_SUCCESS; 965 } 966 967 /** 968 @brief Set re-use of CeedQFunctionAssemblyData 969 970 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 971 @param[in] reuse_data Boolean flag indicating data re-use 972 973 @return An error code: 0 - success, otherwise - failure 974 975 @ref Backend 976 **/ 977 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 978 data->reuse_data = reuse_data; 979 data->needs_data_update = true; 980 return CEED_ERROR_SUCCESS; 981 } 982 983 /** 984 @brief Mark QFunctionAssemblyData as stale 985 986 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 987 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 988 989 @return An error code: 0 - success, otherwise - failure 990 991 @ref Backend 992 **/ 993 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 994 data->needs_data_update = needs_data_update; 995 return CEED_ERROR_SUCCESS; 996 } 997 998 /** 999 @brief Determine if QFunctionAssemblyData needs update 1000 1001 @param[in] data CeedQFunctionAssemblyData to mark as stale 1002 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1003 1004 @return An error code: 0 - success, otherwise - failure 1005 1006 @ref Backend 1007 **/ 1008 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1009 *is_update_needed = !data->reuse_data || data->needs_data_update; 1010 return CEED_ERROR_SUCCESS; 1011 } 1012 1013 /** 1014 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1015 1016 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1017 1018 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1019 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1020 CeedQFunctionAssemblyData. 1021 1022 @param[in] data CeedQFunctionAssemblyData to copy reference to 1023 @param[in,out] data_copy Variable to store copied reference 1024 1025 @return An error code: 0 - success, otherwise - failure 1026 1027 @ref Backend 1028 **/ 1029 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1030 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1031 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1032 *data_copy = data; 1033 return CEED_ERROR_SUCCESS; 1034 } 1035 1036 /** 1037 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1038 1039 @param[in] data CeedQFunctionAssemblyData to retrieve status 1040 @param[out] is_setup Boolean flag for setup status 1041 1042 @return An error code: 0 - success, otherwise - failure 1043 1044 @ref Backend 1045 **/ 1046 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1047 *is_setup = data->is_setup; 1048 return CEED_ERROR_SUCCESS; 1049 } 1050 1051 /** 1052 @brief Set internal objects for CeedQFunctionAssemblyData 1053 1054 @param[in,out] data CeedQFunctionAssemblyData to set objects 1055 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1056 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1057 1058 @return An error code: 0 - success, otherwise - failure 1059 1060 @ref Backend 1061 **/ 1062 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1063 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1064 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1065 1066 data->is_setup = true; 1067 return CEED_ERROR_SUCCESS; 1068 } 1069 1070 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1071 if (!data->is_setup) { 1072 // LCOV_EXCL_START 1073 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1074 // LCOV_EXCL_STOP 1075 } 1076 1077 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1078 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1079 1080 return CEED_ERROR_SUCCESS; 1081 } 1082 1083 /** 1084 @brief Destroy CeedQFunctionAssemblyData 1085 1086 @param[in,out] data CeedQFunctionAssemblyData to destroy 1087 1088 @return An error code: 0 - success, otherwise - failure 1089 1090 @ref Backend 1091 **/ 1092 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1093 if (!*data || --(*data)->ref_count > 0) { 1094 *data = NULL; 1095 return CEED_ERROR_SUCCESS; 1096 } 1097 CeedCall(CeedDestroy(&(*data)->ceed)); 1098 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1099 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1100 1101 CeedCall(CeedFree(data)); 1102 return CEED_ERROR_SUCCESS; 1103 } 1104 1105 /** 1106 @brief Get CeedOperatorAssemblyData 1107 1108 @param[in] op CeedOperator to assemble 1109 @param[out] data CeedQFunctionAssemblyData 1110 1111 @return An error code: 0 - success, otherwise - failure 1112 1113 @ref Backend 1114 **/ 1115 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1116 if (!op->op_assembled) { 1117 CeedOperatorAssemblyData data; 1118 1119 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1120 op->op_assembled = data; 1121 } 1122 *data = op->op_assembled; 1123 1124 return CEED_ERROR_SUCCESS; 1125 } 1126 1127 /** 1128 @brief Create object holding CeedOperator assembly data. 1129 1130 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1131 An array with references to the corresponding active CeedElemRestrictions is also stored. 1132 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1133 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1134 CeedEvalMode. The number of input columns across all active bases for the assembled CeedQFunction is also stored. Lastly, the CeedOperatorAssembly 1135 data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1136 1137 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1138 @param[in] op CeedOperator to be assembled 1139 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1140 1141 @return An error code: 0 - success, otherwise - failure 1142 1143 @ref Backend 1144 **/ 1145 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1146 CeedInt num_active_bases = 0; 1147 1148 // Allocate 1149 CeedCall(CeedCalloc(1, data)); 1150 (*data)->ceed = ceed; 1151 CeedCall(CeedReference(ceed)); 1152 1153 // Build OperatorAssembly data 1154 CeedQFunction qf; 1155 CeedQFunctionField *qf_fields; 1156 CeedOperatorField *op_fields; 1157 CeedInt num_input_fields; 1158 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1159 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1160 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1161 1162 // Determine active input basis 1163 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1164 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1165 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1166 for (CeedInt i = 0; i < num_input_fields; i++) { 1167 CeedVector vec; 1168 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1169 if (vec == CEED_VECTOR_ACTIVE) { 1170 CeedBasis basis_in = NULL; 1171 CeedEvalMode eval_mode; 1172 CeedInt index = -1, dim, num_comp, q_comp; 1173 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1174 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1175 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1176 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1177 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1178 for (CeedInt i = 0; i < num_active_bases; i++) { 1179 if ((*data)->active_bases[i] == basis_in) index = i; 1180 } 1181 if (index == -1) { 1182 CeedElemRestriction elem_rstr_in; 1183 index = num_active_bases; 1184 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1185 (*data)->active_bases[num_active_bases] = NULL; 1186 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1187 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1188 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1189 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1190 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1191 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1192 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1193 num_eval_modes_in[index] = 0; 1194 num_eval_modes_out[index] = 0; 1195 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1196 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1197 eval_modes_in[index] = NULL; 1198 eval_modes_out[index] = NULL; 1199 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1200 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1201 eval_mode_offsets_in[index] = NULL; 1202 eval_mode_offsets_out[index] = NULL; 1203 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1204 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1205 (*data)->assembled_bases_in[index] = NULL; 1206 (*data)->assembled_bases_out[index] = NULL; 1207 num_active_bases++; 1208 } 1209 if (eval_mode != CEED_EVAL_WEIGHT) { 1210 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1211 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1212 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1213 for (CeedInt d = 0; d < q_comp; d++) { 1214 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1215 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1216 offset += num_comp; 1217 } 1218 num_eval_modes_in[index] += q_comp; 1219 } 1220 } 1221 } 1222 (*data)->num_eval_modes_in = num_eval_modes_in; 1223 (*data)->eval_modes_in = eval_modes_in; 1224 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1225 1226 // Determine active output basis 1227 CeedInt num_output_fields; 1228 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1229 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1230 offset = 0; 1231 for (CeedInt i = 0; i < num_output_fields; i++) { 1232 CeedVector vec; 1233 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1234 if (vec == CEED_VECTOR_ACTIVE) { 1235 CeedBasis basis_out = NULL; 1236 CeedEvalMode eval_mode; 1237 CeedInt index = -1, dim, num_comp, q_comp; 1238 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1239 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1240 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1241 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1242 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1243 for (CeedInt i = 0; i < num_active_bases; i++) { 1244 if ((*data)->active_bases[i] == basis_out) index = i; 1245 } 1246 if (index == -1) { 1247 CeedElemRestriction elem_rstr_out; 1248 1249 index = num_active_bases; 1250 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1251 (*data)->active_bases[num_active_bases] = NULL; 1252 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1253 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1254 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1255 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1256 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1257 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1258 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1259 num_eval_modes_in[index] = 0; 1260 num_eval_modes_out[index] = 0; 1261 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1262 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1263 eval_modes_in[index] = NULL; 1264 eval_modes_out[index] = NULL; 1265 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1266 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1267 eval_mode_offsets_in[index] = NULL; 1268 eval_mode_offsets_out[index] = NULL; 1269 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1270 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1271 (*data)->assembled_bases_in[index] = NULL; 1272 (*data)->assembled_bases_out[index] = NULL; 1273 num_active_bases++; 1274 } 1275 if (eval_mode != CEED_EVAL_WEIGHT) { 1276 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1277 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1278 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1279 for (CeedInt d = 0; d < q_comp; d++) { 1280 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1281 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1282 offset += num_comp; 1283 } 1284 num_eval_modes_out[index] += q_comp; 1285 } 1286 } 1287 } 1288 (*data)->num_output_components = offset; 1289 (*data)->num_eval_modes_out = num_eval_modes_out; 1290 (*data)->eval_modes_out = eval_modes_out; 1291 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1292 (*data)->num_active_bases = num_active_bases; 1293 1294 return CEED_ERROR_SUCCESS; 1295 } 1296 1297 /** 1298 @brief Get CeedOperator CeedEvalModes for assembly. 1299 1300 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1301 1302 @param[in] data CeedOperatorAssemblyData 1303 @param[out] num_active_bases Total number of active bases 1304 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1305 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1306 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1307 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1308 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1309 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1310 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1311 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1312 including contributions of all active bases 1313 1314 @return An error code: 0 - success, otherwise - failure 1315 1316 1317 @ref Backend 1318 **/ 1319 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1320 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1321 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1322 if (num_active_bases) *num_active_bases = data->num_active_bases; 1323 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1324 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1325 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1326 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1327 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1328 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1329 if (num_output_components) *num_output_components = data->num_output_components; 1330 1331 return CEED_ERROR_SUCCESS; 1332 } 1333 1334 /** 1335 @brief Get CeedOperator CeedBasis data for assembly. 1336 1337 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1338 1339 @param[in] data CeedOperatorAssemblyData 1340 @param[out] num_active_bases Number of active bases, or NULL 1341 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1342 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1343 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1344 1345 @return An error code: 0 - success, otherwise - failure 1346 1347 @ref Backend 1348 **/ 1349 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1350 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1351 // Assemble B_in, B_out if needed 1352 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1353 CeedInt num_qpts; 1354 1355 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1356 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1357 CeedInt num_nodes; 1358 CeedScalar *B_in = NULL, *identity = NULL; 1359 bool has_eval_none = false; 1360 1361 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1362 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1363 1364 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1365 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1366 } 1367 if (has_eval_none) { 1368 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1369 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1370 identity[i * num_nodes + i] = 1.0; 1371 } 1372 } 1373 1374 for (CeedInt q = 0; q < num_qpts; q++) { 1375 for (CeedInt n = 0; n < num_nodes; n++) { 1376 CeedInt d_in = 0, q_comp_in; 1377 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1378 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1379 const CeedInt qq = data->num_eval_modes_in[b] * q; 1380 const CeedScalar *B = NULL; 1381 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B); 1382 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1383 if (q_comp_in > 1) { 1384 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1385 else B = &B[(++d_in) * num_qpts * num_nodes]; 1386 } 1387 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1388 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1389 } 1390 } 1391 } 1392 if (identity) CeedCall(CeedFree(identity)); 1393 data->assembled_bases_in[b] = B_in; 1394 } 1395 } 1396 1397 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1398 CeedInt num_qpts; 1399 1400 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1401 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1402 CeedInt num_nodes; 1403 bool has_eval_none = false; 1404 CeedScalar *B_out = NULL, *identity = NULL; 1405 1406 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1407 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1408 1409 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1410 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1411 } 1412 if (has_eval_none) { 1413 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1414 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1415 identity[i * num_nodes + i] = 1.0; 1416 } 1417 } 1418 1419 for (CeedInt q = 0; q < num_qpts; q++) { 1420 for (CeedInt n = 0; n < num_nodes; n++) { 1421 CeedInt d_out = 0, q_comp_out; 1422 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1423 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1424 const CeedInt qq = data->num_eval_modes_out[b] * q; 1425 const CeedScalar *B = NULL; 1426 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B); 1427 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1428 if (q_comp_out > 1) { 1429 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1430 else B = &B[(++d_out) * num_qpts * num_nodes]; 1431 } 1432 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1433 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1434 } 1435 } 1436 } 1437 if (identity) CeedCall(CeedFree(identity)); 1438 data->assembled_bases_out[b] = B_out; 1439 } 1440 } 1441 1442 // Pass out assembled data 1443 if (active_bases) *active_bases = data->active_bases; 1444 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1445 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1446 1447 return CEED_ERROR_SUCCESS; 1448 } 1449 1450 /** 1451 @brief Get CeedOperator CeedBasis data for assembly. 1452 1453 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1454 1455 @param[in] data CeedOperatorAssemblyData 1456 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1457 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1458 1459 @return An error code: 0 - success, otherwise - failure 1460 1461 @ref Backend 1462 **/ 1463 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1464 CeedElemRestriction **active_elem_rstrs) { 1465 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1466 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1467 1468 return CEED_ERROR_SUCCESS; 1469 } 1470 1471 /** 1472 @brief Destroy CeedOperatorAssemblyData 1473 1474 @param[in,out] data CeedOperatorAssemblyData to destroy 1475 1476 @return An error code: 0 - success, otherwise - failure 1477 1478 @ref Backend 1479 **/ 1480 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1481 if (!*data) { 1482 *data = NULL; 1483 return CEED_ERROR_SUCCESS; 1484 } 1485 CeedCall(CeedDestroy(&(*data)->ceed)); 1486 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1487 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1488 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1489 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1490 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1491 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1492 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1493 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1494 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1495 } 1496 CeedCall(CeedFree(&(*data)->active_bases)); 1497 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1498 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1499 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1500 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1501 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1502 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1503 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1504 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1505 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1506 1507 CeedCall(CeedFree(data)); 1508 return CEED_ERROR_SUCCESS; 1509 } 1510 1511 /// @} 1512 1513 /// ---------------------------------------------------------------------------- 1514 /// CeedOperator Public API 1515 /// ---------------------------------------------------------------------------- 1516 /// @addtogroup CeedOperatorUser 1517 /// @{ 1518 1519 /** 1520 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1521 1522 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1523 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1524 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1525 1526 Inputs and outputs are in the order provided by the 1527 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, 1528 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] 1529 and producing the output [dv_0, dv_1, v]. 1530 1531 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1532 1533 @param[in] op CeedOperator to assemble CeedQFunction 1534 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1535 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1536 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1537 1538 @return An error code: 0 - success, otherwise - failure 1539 1540 @ref User 1541 **/ 1542 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1543 CeedCall(CeedOperatorCheckReady(op)); 1544 1545 if (op->LinearAssembleQFunction) { 1546 // Backend version 1547 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1548 } else { 1549 // Operator fallback 1550 CeedOperator op_fallback; 1551 1552 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1553 if (op_fallback) { 1554 CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1555 } else { 1556 // LCOV_EXCL_START 1557 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1558 // LCOV_EXCL_STOP 1559 } 1560 } 1561 return CEED_ERROR_SUCCESS; 1562 } 1563 1564 /** 1565 @brief Assemble CeedQFunction and store result internally. 1566 1567 Return copied references of stored data to the caller. 1568 Caller is responsible for ownership and destruction of the copied references. 1569 See also @ref CeedOperatorLinearAssembleQFunction 1570 1571 @param[in] op CeedOperator to assemble CeedQFunction 1572 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1573 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1574 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1575 1576 @return An error code: 0 - success, otherwise - failure 1577 1578 @ref User 1579 **/ 1580 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1581 CeedCall(CeedOperatorCheckReady(op)); 1582 1583 if (op->LinearAssembleQFunctionUpdate) { 1584 // Backend version 1585 bool qf_assembled_is_setup; 1586 CeedVector assembled_vec = NULL; 1587 CeedElemRestriction assembled_rstr = NULL; 1588 1589 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1590 if (qf_assembled_is_setup) { 1591 bool update_needed; 1592 1593 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1594 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1595 if (update_needed) { 1596 CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1597 } 1598 } else { 1599 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1600 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1601 } 1602 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1603 1604 // Copy reference from internally held copy 1605 *assembled = NULL; 1606 *rstr = NULL; 1607 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1608 CeedCall(CeedVectorDestroy(&assembled_vec)); 1609 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1610 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1611 } else { 1612 // Operator fallback 1613 CeedOperator op_fallback; 1614 1615 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1616 if (op_fallback) { 1617 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1618 } else { 1619 // LCOV_EXCL_START 1620 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1621 // LCOV_EXCL_STOP 1622 } 1623 } 1624 1625 return CEED_ERROR_SUCCESS; 1626 } 1627 1628 /** 1629 @brief Assemble the diagonal of a square linear CeedOperator 1630 1631 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1632 1633 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1634 1635 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1636 1637 @param[in] op CeedOperator to assemble CeedQFunction 1638 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1639 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1640 1641 @return An error code: 0 - success, otherwise - failure 1642 1643 @ref User 1644 **/ 1645 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1646 bool is_composite; 1647 CeedCall(CeedOperatorCheckReady(op)); 1648 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1649 1650 CeedSize input_size = 0, output_size = 0; 1651 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1652 if (input_size != output_size) { 1653 // LCOV_EXCL_START 1654 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1655 // LCOV_EXCL_STOP 1656 } 1657 1658 // Early exit for empty operator 1659 if (!is_composite) { 1660 CeedInt num_elem = 0; 1661 1662 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1663 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1664 } 1665 1666 if (op->LinearAssembleDiagonal) { 1667 // Backend version 1668 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1669 return CEED_ERROR_SUCCESS; 1670 } else if (op->LinearAssembleAddDiagonal) { 1671 // Backend version with zeroing first 1672 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1673 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1674 return CEED_ERROR_SUCCESS; 1675 } else { 1676 // Operator fallback 1677 CeedOperator op_fallback; 1678 1679 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1680 if (op_fallback) { 1681 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1682 return CEED_ERROR_SUCCESS; 1683 } 1684 } 1685 // Default interface implementation 1686 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1687 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1688 1689 return CEED_ERROR_SUCCESS; 1690 } 1691 1692 /** 1693 @brief Assemble the diagonal of a square linear CeedOperator 1694 1695 This sums into a CeedVector the diagonal of a linear CeedOperator. 1696 1697 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1698 1699 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1700 1701 @param[in] op CeedOperator to assemble CeedQFunction 1702 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1703 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1704 1705 @return An error code: 0 - success, otherwise - failure 1706 1707 @ref User 1708 **/ 1709 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1710 bool is_composite; 1711 CeedCall(CeedOperatorCheckReady(op)); 1712 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1713 1714 CeedSize input_size = 0, output_size = 0; 1715 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1716 if (input_size != output_size) { 1717 // LCOV_EXCL_START 1718 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1719 // LCOV_EXCL_STOP 1720 } 1721 1722 // Early exit for empty operator 1723 if (!is_composite) { 1724 CeedInt num_elem = 0; 1725 1726 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1727 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1728 } 1729 1730 if (op->LinearAssembleAddDiagonal) { 1731 // Backend version 1732 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1733 return CEED_ERROR_SUCCESS; 1734 } else { 1735 // Operator fallback 1736 CeedOperator op_fallback; 1737 1738 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1739 if (op_fallback) { 1740 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1741 return CEED_ERROR_SUCCESS; 1742 } 1743 } 1744 // Default interface implementation 1745 if (is_composite) { 1746 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1747 } else { 1748 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1749 } 1750 1751 return CEED_ERROR_SUCCESS; 1752 } 1753 1754 /** 1755 @brief Assemble the point block diagonal of a square linear CeedOperator 1756 1757 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1758 1759 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1760 1761 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1762 1763 @param[in] op CeedOperator to assemble CeedQFunction 1764 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1765 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1766 component in]. 1767 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1768 1769 @return An error code: 0 - success, otherwise - failure 1770 1771 @ref User 1772 **/ 1773 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1774 bool is_composite; 1775 CeedCall(CeedOperatorCheckReady(op)); 1776 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1777 1778 CeedSize input_size = 0, output_size = 0; 1779 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1780 if (input_size != output_size) { 1781 // LCOV_EXCL_START 1782 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1783 // LCOV_EXCL_STOP 1784 } 1785 1786 // Early exit for empty operator 1787 if (!is_composite) { 1788 CeedInt num_elem = 0; 1789 1790 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1791 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1792 } 1793 1794 if (op->LinearAssemblePointBlockDiagonal) { 1795 // Backend version 1796 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1797 return CEED_ERROR_SUCCESS; 1798 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1799 // Backend version with zeroing first 1800 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1801 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1802 return CEED_ERROR_SUCCESS; 1803 } else { 1804 // Operator fallback 1805 CeedOperator op_fallback; 1806 1807 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1808 if (op_fallback) { 1809 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1810 return CEED_ERROR_SUCCESS; 1811 } 1812 } 1813 // Default interface implementation 1814 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1815 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1816 1817 return CEED_ERROR_SUCCESS; 1818 } 1819 1820 /** 1821 @brief Assemble the point block diagonal of a square linear CeedOperator 1822 1823 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1824 1825 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1826 1827 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1828 1829 @param[in] op CeedOperator to assemble CeedQFunction 1830 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1831 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1832 component in]. 1833 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1834 1835 @return An error code: 0 - success, otherwise - failure 1836 1837 @ref User 1838 **/ 1839 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1840 bool is_composite; 1841 CeedCall(CeedOperatorCheckReady(op)); 1842 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1843 1844 CeedSize input_size = 0, output_size = 0; 1845 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1846 if (input_size != output_size) { 1847 // LCOV_EXCL_START 1848 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1849 // LCOV_EXCL_STOP 1850 } 1851 1852 // Early exit for empty operator 1853 if (!is_composite) { 1854 CeedInt num_elem = 0; 1855 1856 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1857 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1858 } 1859 1860 if (op->LinearAssembleAddPointBlockDiagonal) { 1861 // Backend version 1862 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1863 return CEED_ERROR_SUCCESS; 1864 } else { 1865 // Operator fallback 1866 CeedOperator op_fallback; 1867 1868 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1869 if (op_fallback) { 1870 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1871 return CEED_ERROR_SUCCESS; 1872 } 1873 } 1874 // Default interface implementation 1875 if (is_composite) { 1876 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1877 } else { 1878 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1879 } 1880 1881 return CEED_ERROR_SUCCESS; 1882 } 1883 1884 /** 1885 @brief Fully assemble the nonzero pattern of a linear operator. 1886 1887 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1888 1889 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1890 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j) 1891 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1892 1893 This will generally be slow unless your operator is low-order. 1894 1895 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1896 1897 @param[in] op CeedOperator to assemble 1898 @param[out] num_entries Number of entries in coordinate nonzero pattern 1899 @param[out] rows Row number for each entry 1900 @param[out] cols Column number for each entry 1901 1902 @ref User 1903 **/ 1904 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1905 CeedInt num_suboperators, single_entries; 1906 CeedOperator *sub_operators; 1907 bool is_composite; 1908 CeedCall(CeedOperatorCheckReady(op)); 1909 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1910 1911 if (op->LinearAssembleSymbolic) { 1912 // Backend version 1913 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1914 return CEED_ERROR_SUCCESS; 1915 } else { 1916 // Operator fallback 1917 CeedOperator op_fallback; 1918 1919 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1920 if (op_fallback) { 1921 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1922 return CEED_ERROR_SUCCESS; 1923 } 1924 } 1925 1926 // Default interface implementation 1927 1928 // count entries and allocate rows, cols arrays 1929 *num_entries = 0; 1930 if (is_composite) { 1931 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1932 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1933 for (CeedInt k = 0; k < num_suboperators; ++k) { 1934 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1935 *num_entries += single_entries; 1936 } 1937 } else { 1938 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1939 *num_entries += single_entries; 1940 } 1941 CeedCall(CeedCalloc(*num_entries, rows)); 1942 CeedCall(CeedCalloc(*num_entries, cols)); 1943 1944 // assemble nonzero locations 1945 CeedInt offset = 0; 1946 if (is_composite) { 1947 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1948 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1949 for (CeedInt k = 0; k < num_suboperators; ++k) { 1950 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1951 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1952 offset += single_entries; 1953 } 1954 } else { 1955 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1956 } 1957 1958 return CEED_ERROR_SUCCESS; 1959 } 1960 1961 /** 1962 @brief Fully assemble the nonzero entries of a linear operator. 1963 1964 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1965 1966 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1967 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added, 1968 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1969 1970 This will generally be slow unless your operator is low-order. 1971 1972 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1973 1974 @param[in] op CeedOperator to assemble 1975 @param[out] values Values to assemble into matrix 1976 1977 @ref User 1978 **/ 1979 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1980 CeedInt num_suboperators, single_entries = 0; 1981 CeedOperator *sub_operators; 1982 bool is_composite; 1983 CeedCall(CeedOperatorCheckReady(op)); 1984 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1985 1986 // Early exit for empty operator 1987 if (!is_composite) { 1988 CeedInt num_elem = 0; 1989 1990 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1991 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1992 } 1993 1994 if (op->LinearAssemble) { 1995 // Backend version 1996 CeedCall(op->LinearAssemble(op, values)); 1997 return CEED_ERROR_SUCCESS; 1998 } else { 1999 // Operator fallback 2000 CeedOperator op_fallback; 2001 2002 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2003 if (op_fallback) { 2004 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2005 return CEED_ERROR_SUCCESS; 2006 } 2007 } 2008 2009 // Default interface implementation 2010 CeedInt offset = 0; 2011 CeedCall(CeedVectorSetValue(values, 0.0)); 2012 if (is_composite) { 2013 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2014 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2015 for (CeedInt k = 0; k < num_suboperators; k++) { 2016 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2017 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2018 offset += single_entries; 2019 } 2020 } else { 2021 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2022 } 2023 2024 return CEED_ERROR_SUCCESS; 2025 } 2026 2027 /** 2028 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2029 2030 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2031 2032 @param[in] op Composite CeedOperator 2033 @param[in] num_skip_indices Number of suboperators to skip 2034 @param[in] skip_indices Array of indices of suboperators to skip 2035 @param[out] mult Vector to store multiplicity (of size l_size) 2036 2037 @return An error code: 0 - success, otherwise - failure 2038 2039 @ref User 2040 **/ 2041 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2042 CeedCall(CeedOperatorCheckReady(op)); 2043 2044 Ceed ceed; 2045 CeedInt num_suboperators; 2046 CeedSize l_vec_len; 2047 CeedScalar *mult_array; 2048 CeedVector ones_l_vec; 2049 CeedElemRestriction elem_rstr; 2050 CeedOperator *sub_operators; 2051 2052 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2053 2054 // Zero mult vector 2055 CeedCall(CeedVectorSetValue(mult, 0.0)); 2056 2057 // Get suboperators 2058 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2059 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2060 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2061 2062 // Work vector 2063 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2064 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2065 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2066 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2067 2068 // Compute multiplicity across suboperators 2069 for (CeedInt i = 0; i < num_suboperators; i++) { 2070 const CeedScalar *sub_mult_array; 2071 CeedVector sub_mult_l_vec, ones_e_vec; 2072 2073 // -- Check for suboperator to skip 2074 for (CeedInt j = 0; j < num_skip_indices; j++) { 2075 if (skip_indices[j] == i) continue; 2076 } 2077 2078 // -- Sub operator multiplicity 2079 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2080 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2081 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2082 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2083 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2084 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2085 // ---- Flag every node present in the current suboperator 2086 for (CeedInt j = 0; j < l_vec_len; j++) { 2087 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2088 } 2089 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2090 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2091 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2092 } 2093 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2094 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2095 2096 return CEED_ERROR_SUCCESS; 2097 } 2098 2099 /** 2100 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2101 grid interpolation 2102 2103 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2104 2105 @param[in] op_fine Fine grid operator 2106 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2107 @param[in] rstr_coarse Coarse grid restriction 2108 @param[in] basis_coarse Coarse grid active vector basis 2109 @param[out] op_coarse Coarse grid operator 2110 @param[out] op_prolong Coarse to fine operator, or NULL 2111 @param[out] op_restrict Fine to coarse operator, or NULL 2112 2113 @return An error code: 0 - success, otherwise - failure 2114 2115 @ref User 2116 **/ 2117 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2118 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2119 CeedCall(CeedOperatorCheckReady(op_fine)); 2120 2121 // Build prolongation matrix, if required 2122 CeedBasis basis_c_to_f = NULL; 2123 if (op_prolong || op_restrict) { 2124 CeedBasis basis_fine; 2125 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2126 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2127 } 2128 2129 // Core code 2130 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2131 2132 return CEED_ERROR_SUCCESS; 2133 } 2134 2135 /** 2136 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2137 2138 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2139 2140 @param[in] op_fine Fine grid operator 2141 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2142 @param[in] rstr_coarse Coarse grid restriction 2143 @param[in] basis_coarse Coarse grid active vector basis 2144 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2145 @param[out] op_coarse Coarse grid operator 2146 @param[out] op_prolong Coarse to fine operator, or NULL 2147 @param[out] op_restrict Fine to coarse operator, or NULL 2148 2149 @return An error code: 0 - success, otherwise - failure 2150 2151 @ref User 2152 **/ 2153 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2154 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2155 CeedOperator *op_restrict) { 2156 CeedCall(CeedOperatorCheckReady(op_fine)); 2157 Ceed ceed; 2158 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2159 2160 // Check for compatible quadrature spaces 2161 CeedBasis basis_fine; 2162 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2163 CeedInt Q_f, Q_c; 2164 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2165 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2166 if (Q_f != Q_c) { 2167 // LCOV_EXCL_START 2168 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2169 // LCOV_EXCL_STOP 2170 } 2171 2172 // Create coarse to fine basis, if required 2173 CeedBasis basis_c_to_f = NULL; 2174 if (op_prolong || op_restrict) { 2175 // Check if interpolation matrix is provided 2176 if (!interp_c_to_f) { 2177 // LCOV_EXCL_START 2178 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2179 // LCOV_EXCL_STOP 2180 } 2181 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2182 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2183 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2184 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2185 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2186 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2187 CeedScalar *q_ref, *q_weight, *grad; 2188 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2189 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2190 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2191 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2192 CeedCall(CeedFree(&q_ref)); 2193 CeedCall(CeedFree(&q_weight)); 2194 CeedCall(CeedFree(&grad)); 2195 } 2196 2197 // Core code 2198 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2199 return CEED_ERROR_SUCCESS; 2200 } 2201 2202 /** 2203 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2204 2205 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2206 2207 @param[in] op_fine Fine grid operator 2208 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2209 @param[in] rstr_coarse Coarse grid restriction 2210 @param[in] basis_coarse Coarse grid active vector basis 2211 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2212 @param[out] op_coarse Coarse grid operator 2213 @param[out] op_prolong Coarse to fine operator, or NULL 2214 @param[out] op_restrict Fine to coarse operator, or NULL 2215 2216 @return An error code: 0 - success, otherwise - failure 2217 2218 @ref User 2219 **/ 2220 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2221 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2222 CeedOperator *op_restrict) { 2223 CeedCall(CeedOperatorCheckReady(op_fine)); 2224 Ceed ceed; 2225 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2226 2227 // Check for compatible quadrature spaces 2228 CeedBasis basis_fine; 2229 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2230 CeedInt Q_f, Q_c; 2231 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2232 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2233 if (Q_f != Q_c) { 2234 // LCOV_EXCL_START 2235 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2236 // LCOV_EXCL_STOP 2237 } 2238 2239 // Coarse to fine basis 2240 CeedBasis basis_c_to_f = NULL; 2241 if (op_prolong || op_restrict) { 2242 // Check if interpolation matrix is provided 2243 if (!interp_c_to_f) { 2244 // LCOV_EXCL_START 2245 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2246 // LCOV_EXCL_STOP 2247 } 2248 CeedElemTopology topo; 2249 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2250 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2251 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2252 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2253 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2254 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2255 CeedScalar *q_ref, *q_weight, *grad; 2256 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2257 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2258 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2259 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2260 CeedCall(CeedFree(&q_ref)); 2261 CeedCall(CeedFree(&q_weight)); 2262 CeedCall(CeedFree(&grad)); 2263 } 2264 2265 // Core code 2266 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2267 return CEED_ERROR_SUCCESS; 2268 } 2269 2270 /** 2271 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2272 2273 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2274 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2275 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2276 \hat S V\f$. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear. 2277 2278 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2279 2280 @param[in] op CeedOperator to create element inverses 2281 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2282 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2283 2284 @return An error code: 0 - success, otherwise - failure 2285 2286 @ref User 2287 **/ 2288 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2289 CeedCall(CeedOperatorCheckReady(op)); 2290 2291 if (op->CreateFDMElementInverse) { 2292 // Backend version 2293 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2294 return CEED_ERROR_SUCCESS; 2295 } else { 2296 // Operator fallback 2297 CeedOperator op_fallback; 2298 2299 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2300 if (op_fallback) { 2301 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2302 return CEED_ERROR_SUCCESS; 2303 } 2304 } 2305 2306 // Default interface implementation 2307 Ceed ceed, ceed_parent; 2308 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2309 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2310 ceed_parent = ceed_parent ? ceed_parent : ceed; 2311 CeedQFunction qf; 2312 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2313 2314 // Determine active input basis 2315 bool interp = false, grad = false; 2316 CeedBasis basis = NULL; 2317 CeedElemRestriction rstr = NULL; 2318 CeedOperatorField *op_fields; 2319 CeedQFunctionField *qf_fields; 2320 CeedInt num_input_fields; 2321 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2322 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2323 for (CeedInt i = 0; i < num_input_fields; i++) { 2324 CeedVector vec; 2325 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2326 if (vec == CEED_VECTOR_ACTIVE) { 2327 CeedEvalMode eval_mode; 2328 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2329 interp = interp || eval_mode == CEED_EVAL_INTERP; 2330 grad = grad || eval_mode == CEED_EVAL_GRAD; 2331 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2332 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2333 } 2334 } 2335 if (!basis) { 2336 // LCOV_EXCL_START 2337 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2338 // LCOV_EXCL_STOP 2339 } 2340 CeedSize l_size = 1; 2341 CeedInt P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2342 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2343 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2344 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2345 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2346 CeedCall(CeedBasisGetDimension(basis, &dim)); 2347 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2348 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2349 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2350 2351 // Build and diagonalize 1D Mass and Laplacian 2352 bool tensor_basis; 2353 CeedCall(CeedBasisIsTensor(basis, &tensor_basis)); 2354 if (!tensor_basis) { 2355 // LCOV_EXCL_START 2356 return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2357 // LCOV_EXCL_STOP 2358 } 2359 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2360 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2361 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2362 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2363 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2364 CeedCall(CeedCalloc(P_1d, &lambda)); 2365 // -- Build matrices 2366 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2367 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2368 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2369 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2370 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2371 2372 // -- Diagonalize 2373 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2374 CeedCall(CeedFree(&mass)); 2375 CeedCall(CeedFree(&laplace)); 2376 for (CeedInt i = 0; i < P_1d; i++) { 2377 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2378 } 2379 CeedCall(CeedFree(&x)); 2380 2381 // Assemble QFunction 2382 CeedVector assembled; 2383 CeedElemRestriction rstr_qf; 2384 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2385 CeedInt layout[3]; 2386 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2387 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2388 CeedScalar max_norm = 0; 2389 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2390 2391 // Calculate element averages 2392 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2393 CeedScalar *elem_avg; 2394 const CeedScalar *assembled_array, *q_weight_array; 2395 CeedVector q_weight; 2396 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2397 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2398 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2399 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2400 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2401 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2402 for (CeedInt e = 0; e < num_elem; e++) { 2403 CeedInt count = 0; 2404 for (CeedInt q = 0; q < num_qpts; q++) { 2405 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2406 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2407 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2408 count++; 2409 } 2410 } 2411 } 2412 if (count) { 2413 elem_avg[e] /= count; 2414 } else { 2415 elem_avg[e] = 1.0; 2416 } 2417 } 2418 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2419 CeedCall(CeedVectorDestroy(&assembled)); 2420 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2421 CeedCall(CeedVectorDestroy(&q_weight)); 2422 2423 // Build FDM diagonal 2424 CeedVector q_data; 2425 CeedScalar *q_data_array, *fdm_diagonal; 2426 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2427 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2428 for (CeedInt c = 0; c < num_comp; c++) { 2429 for (CeedInt n = 0; n < num_nodes; n++) { 2430 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2431 if (grad) { 2432 for (CeedInt d = 0; d < dim; d++) { 2433 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2434 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2435 } 2436 } 2437 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2438 } 2439 } 2440 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2441 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2442 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2443 for (CeedInt e = 0; e < num_elem; e++) { 2444 for (CeedInt c = 0; c < num_comp; c++) { 2445 for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2446 } 2447 } 2448 CeedCall(CeedFree(&elem_avg)); 2449 CeedCall(CeedFree(&fdm_diagonal)); 2450 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2451 2452 // Setup FDM operator 2453 // -- Basis 2454 CeedBasis fdm_basis; 2455 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2456 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2457 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2458 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2459 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2460 CeedCall(CeedFree(&fdm_interp)); 2461 CeedCall(CeedFree(&grad_dummy)); 2462 CeedCall(CeedFree(&q_ref_dummy)); 2463 CeedCall(CeedFree(&q_weight_dummy)); 2464 CeedCall(CeedFree(&lambda)); 2465 2466 // -- Restriction 2467 CeedElemRestriction rstr_qd_i; 2468 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2469 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2470 // -- QFunction 2471 CeedQFunction qf_fdm; 2472 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2473 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2474 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2475 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2476 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2477 // -- QFunction context 2478 CeedInt *num_comp_data; 2479 CeedCall(CeedCalloc(1, &num_comp_data)); 2480 num_comp_data[0] = num_comp; 2481 CeedQFunctionContext ctx_fdm; 2482 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2483 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2484 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2485 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2486 // -- Operator 2487 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2488 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2489 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2490 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2491 2492 // Cleanup 2493 CeedCall(CeedVectorDestroy(&q_data)); 2494 CeedCall(CeedBasisDestroy(&fdm_basis)); 2495 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2496 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2497 2498 return CEED_ERROR_SUCCESS; 2499 } 2500 2501 /// @} 2502