1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) { 128 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 } 130 // Cleanup 131 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 133 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 134 } 135 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 136 CeedCall(CeedOperatorCheckReady(op_fallback)); 137 op->op_fallback = op_fallback; 138 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 144 145 @param[in] op CeedOperator to retrieve fallback for 146 @param[out] op_fallback Fallback CeedOperator 147 148 @return An error code: 0 - success, otherwise - failure 149 150 @ref Developer 151 **/ 152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 153 // Create if needed 154 if (!op->op_fallback) { 155 CeedCall(CeedOperatorCreateFallback(op)); 156 } 157 if (op->op_fallback) { 158 bool is_debug; 159 160 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 161 if (is_debug) { 162 Ceed ceed, ceed_fallback; 163 const char *resource, *resource_fallback; 164 165 CeedCall(CeedOperatorGetCeed(op, &ceed)); 166 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 167 CeedCall(CeedGetResource(ceed, &resource)); 168 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 169 170 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 171 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 172 op->op_fallback); 173 } 174 } 175 *op_fallback = op->op_fallback; 176 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /** 181 @brief Select correct basis matrix pointer based on CeedEvalMode 182 183 @param[in] basis CeedBasis from which to get the basis matrix 184 @param[in] eval_mode Current basis evaluation mode 185 @param[in] identity Pointer to identity matrix 186 @param[out] basis_ptr Basis pointer to set 187 188 @ref Developer 189 **/ 190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 191 switch (eval_mode) { 192 case CEED_EVAL_NONE: 193 *basis_ptr = identity; 194 break; 195 case CEED_EVAL_INTERP: 196 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 197 break; 198 case CEED_EVAL_GRAD: 199 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 200 break; 201 case CEED_EVAL_DIV: 202 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 203 break; 204 case CEED_EVAL_CURL: 205 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 206 break; 207 case CEED_EVAL_WEIGHT: 208 break; // Caught by QF Assembly 209 } 210 assert(*basis_ptr != NULL); 211 212 return CEED_ERROR_SUCCESS; 213 } 214 215 /** 216 @brief Create point block restriction for active operator field 217 218 @param[in] rstr Original CeedElemRestriction for active field 219 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 220 221 @return An error code: 0 - success, otherwise - failure 222 223 @ref Developer 224 **/ 225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 226 Ceed ceed; 227 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 228 const CeedInt *offsets; 229 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 230 231 // Expand offsets 232 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 233 CeedSize l_size; 234 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 235 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 236 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 237 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 238 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 239 CeedInt shift = num_comp; 240 if (comp_stride != 1) shift *= num_comp; 241 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 242 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 243 pointblock_offsets[i] = offsets[i] * shift; 244 } 245 246 // Create new restriction 247 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 248 pointblock_offsets, pointblock_rstr)); 249 250 // Cleanup 251 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 252 253 return CEED_ERROR_SUCCESS; 254 } 255 256 /** 257 @brief Core logic for assembling operator diagonal or point block diagonal 258 259 @param[in] op CeedOperator to assemble point block diagonal 260 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 261 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 262 @param[out] assembled CeedVector to store assembled diagonal 263 264 @return An error code: 0 - success, otherwise - failure 265 266 @ref Developer 267 **/ 268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 269 Ceed ceed; 270 CeedCall(CeedOperatorGetCeed(op, &ceed)); 271 272 // Assemble QFunction 273 CeedQFunction qf; 274 const CeedScalar *assembled_qf_array; 275 CeedVector assembled_qf; 276 CeedElemRestriction assembled_elem_rstr; 277 CeedInt num_input_fields, num_output_fields; 278 CeedInt layout[3]; 279 280 CeedCall(CeedOperatorGetQFunction(op, &qf)); 281 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 282 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 283 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 284 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 285 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 286 287 // Get assembly data 288 CeedOperatorAssemblyData data; 289 const CeedEvalMode **eval_modes_in, **eval_modes_out; 290 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 291 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 292 CeedBasis *active_bases; 293 CeedElemRestriction *active_elem_rstrs; 294 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 295 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 296 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 297 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 298 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 299 300 // Loop over all active bases 301 for (CeedInt b = 0; b < num_active_bases; b++) { 302 // Assemble point block diagonal restriction, if needed 303 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 304 305 if (is_pointblock) { 306 CeedElemRestriction point_block_elem_rstr; 307 308 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 309 diag_elem_rstr = point_block_elem_rstr; 310 } 311 312 // Create diagonal vector 313 CeedVector elem_diag; 314 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 315 316 // Assemble element operator diagonals 317 CeedScalar *elem_diag_array; 318 CeedInt num_elem, num_nodes, num_qpts, num_components; 319 320 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 321 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 322 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 323 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 324 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 325 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 326 327 // Construct identity matrix for basis if required 328 bool has_eval_none = false; 329 CeedScalar *identity = NULL; 330 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 331 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 332 } 333 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 334 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 335 } 336 if (has_eval_none) { 337 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 338 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 339 } 340 341 // Compute the diagonal of B^T D B 342 // Each element 343 for (CeedInt e = 0; e < num_elem; e++) { 344 // Each basis eval mode pair 345 CeedInt d_out = 0, q_comp_out; 346 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 347 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 348 const CeedScalar *B_t = NULL; 349 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t); 350 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out)); 351 if (q_comp_out > 1) { 352 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 353 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 354 } 355 eval_mode_out_prev = eval_modes_out[b][e_out]; 356 357 CeedInt d_in = 0, q_comp_in; 358 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 359 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 360 const CeedScalar *B = NULL; 361 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B); 362 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in)); 363 if (q_comp_in > 1) { 364 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 365 else B = &B[(++d_in) * num_qpts * num_nodes]; 366 } 367 eval_mode_in_prev = eval_modes_in[b][e_in]; 368 369 // Each component 370 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 371 // Each qpt/node pair 372 for (CeedInt q = 0; q < num_qpts; q++) { 373 if (is_pointblock) { 374 // Point Block Diagonal 375 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 376 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 377 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 378 for (CeedInt n = 0; n < num_nodes; n++) { 379 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 380 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 381 } 382 } 383 } else { 384 // Diagonal Only 385 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 386 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 387 for (CeedInt n = 0; n < num_nodes; n++) { 388 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 389 } 390 } 391 } 392 } 393 } 394 } 395 } 396 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 397 398 // Assemble local operator diagonal 399 CeedCall(CeedElemRestrictionApplyUnsigned(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 400 401 // Cleanup 402 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 403 CeedCall(CeedVectorDestroy(&elem_diag)); 404 CeedCall(CeedFree(&identity)); 405 } 406 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 407 CeedCall(CeedVectorDestroy(&assembled_qf)); 408 409 return CEED_ERROR_SUCCESS; 410 } 411 412 /** 413 @brief Core logic for assembling composite operator diagonal 414 415 @param[in] op CeedOperator to assemble point block diagonal 416 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 417 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 418 @param[out] assembled CeedVector to store assembled diagonal 419 420 @return An error code: 0 - success, otherwise - failure 421 422 @ref Developer 423 **/ 424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 CeedInt num_sub; 427 CeedOperator *suboperators; 428 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 429 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 430 for (CeedInt i = 0; i < num_sub; i++) { 431 if (is_pointblock) { 432 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 433 } else { 434 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 435 } 436 } 437 return CEED_ERROR_SUCCESS; 438 } 439 440 /** 441 @brief Build nonzero pattern for non-composite operator 442 443 Users should generally use CeedOperatorLinearAssembleSymbolic() 444 445 Note: For operators using oriented element restrictions, entries in rows or cols may be negative indicating the assembled value at this nonzero 446 should be negated 447 448 @param[in] op CeedOperator to assemble nonzero pattern 449 @param[in] offset Offset for number of entries 450 @param[out] rows Row number for each entry 451 @param[out] cols Column number for each entry 452 453 @return An error code: 0 - success, otherwise - failure 454 455 @ref Developer 456 **/ 457 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 458 Ceed ceed; 459 bool is_composite; 460 CeedCall(CeedOperatorGetCeed(op, &ceed)); 461 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 462 463 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 464 465 CeedSize num_nodes; 466 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 467 CeedElemRestriction rstr_in; 468 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 469 CeedInt num_elem, elem_size, num_comp; 470 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 471 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 472 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 473 CeedInt layout_er[3]; 474 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 475 476 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 477 478 // Determine elem_dof relation 479 CeedVector index_vec; 480 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 481 CeedScalar *array; 482 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 483 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 484 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 485 CeedVector elem_dof; 486 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 487 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 488 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 489 const CeedScalar *elem_dof_a; 490 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 491 CeedCall(CeedVectorDestroy(&index_vec)); 492 493 // Determine i, j locations for element matrices 494 CeedInt count = 0; 495 for (CeedInt e = 0; e < num_elem; e++) { 496 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 497 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 498 for (CeedInt i = 0; i < elem_size; i++) { 499 for (CeedInt j = 0; j < elem_size; j++) { 500 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 501 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 502 503 const CeedInt row = elem_dof_a[elem_dof_index_row]; 504 const CeedInt col = elem_dof_a[elem_dof_index_col]; 505 506 rows[offset + count] = row; 507 cols[offset + count] = col; 508 count++; 509 } 510 } 511 } 512 } 513 } 514 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 515 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 516 CeedCall(CeedVectorDestroy(&elem_dof)); 517 518 return CEED_ERROR_SUCCESS; 519 } 520 521 /** 522 @brief Assemble nonzero entries for non-composite operator 523 524 Users should generally use CeedOperatorLinearAssemble() 525 526 @param[in] op CeedOperator to assemble 527 @param[in] offset Offset for number of entries 528 @param[out] values Values to assemble into matrix 529 530 @return An error code: 0 - success, otherwise - failure 531 532 @ref Developer 533 **/ 534 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 535 Ceed ceed; 536 bool is_composite; 537 CeedCall(CeedOperatorGetCeed(op, &ceed)); 538 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 539 540 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 541 542 // Early exit for empty operator 543 { 544 CeedInt num_elem = 0; 545 546 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 547 if (num_elem == 0) return CEED_ERROR_SUCCESS; 548 } 549 550 if (op->LinearAssembleSingle) { 551 // Backend version 552 CeedCall(op->LinearAssembleSingle(op, offset, values)); 553 return CEED_ERROR_SUCCESS; 554 } else { 555 // Operator fallback 556 CeedOperator op_fallback; 557 558 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 559 if (op_fallback) { 560 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 561 return CEED_ERROR_SUCCESS; 562 } 563 } 564 565 // Assemble QFunction 566 CeedQFunction qf; 567 CeedCall(CeedOperatorGetQFunction(op, &qf)); 568 CeedVector assembled_qf; 569 CeedElemRestriction rstr_q; 570 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 571 CeedSize qf_length; 572 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 573 574 CeedInt num_input_fields, num_output_fields; 575 CeedOperatorField *input_fields; 576 CeedOperatorField *output_fields; 577 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 578 579 // Get assembly data 580 CeedOperatorAssemblyData data; 581 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 582 const CeedEvalMode **eval_modes_in, **eval_modes_out; 583 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 584 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 585 &eval_modes_out, NULL, NULL)); 586 CeedBasis *bases; 587 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 588 CeedBasis basis_in = bases[0]; 589 590 CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 591 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 592 593 CeedElemRestriction active_rstr; 594 CeedInt num_elem, elem_size, num_qpts, num_comp; 595 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 596 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 597 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 598 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 599 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 600 601 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 602 603 // loop over elements and put in data structure 604 const CeedScalar *assembled_qf_array; 605 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 606 607 CeedInt layout_qf[3]; 608 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 609 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 610 611 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 612 const CeedScalar **B_mats_in, **B_mats_out; 613 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 614 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 615 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 616 CeedScalar elem_mat[elem_size * elem_size]; 617 CeedInt count = 0; 618 CeedScalar *vals; 619 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 620 for (CeedInt e = 0; e < num_elem; e++) { 621 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 622 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 623 // Compute B^T*D 624 for (CeedInt n = 0; n < elem_size; n++) { 625 for (CeedInt q = 0; q < num_qpts; q++) { 626 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 627 const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 628 CeedScalar sum = 0.0; 629 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 630 const CeedInt b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 631 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 632 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 633 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 634 } 635 BTD_mat[btd_index] = sum; 636 } 637 } 638 } 639 // form element matrix itself (for each block component) 640 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 641 642 // put element matrix in coordinate data structure 643 for (CeedInt i = 0; i < elem_size; i++) { 644 for (CeedInt j = 0; j < elem_size; j++) { 645 vals[offset + count] = elem_mat[i * elem_size + j]; 646 count++; 647 } 648 } 649 } 650 } 651 } 652 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 653 CeedCall(CeedVectorRestoreArray(values, &vals)); 654 655 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 656 CeedCall(CeedVectorDestroy(&assembled_qf)); 657 658 return CEED_ERROR_SUCCESS; 659 } 660 661 /** 662 @brief Count number of entries for assembled CeedOperator 663 664 @param[in] op CeedOperator to assemble 665 @param[out] num_entries Number of entries in assembled representation 666 667 @return An error code: 0 - success, otherwise - failure 668 669 @ref Utility 670 **/ 671 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 672 bool is_composite; 673 CeedElemRestriction rstr; 674 CeedInt num_elem, elem_size, num_comp; 675 676 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 677 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 678 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 679 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 680 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 681 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 682 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 683 684 return CEED_ERROR_SUCCESS; 685 } 686 687 /** 688 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 689 690 @param[in] op_fine Fine grid operator 691 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 692 @param[in] rstr_coarse Coarse grid restriction 693 @param[in] basis_coarse Coarse grid active vector basis 694 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 695 @param[out] op_coarse Coarse grid operator 696 @param[out] op_prolong Coarse to fine operator, or NULL 697 @param[out] op_restrict Fine to coarse operator, or NULL 698 699 @return An error code: 0 - success, otherwise - failure 700 701 @ref Developer 702 **/ 703 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 704 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 705 Ceed ceed; 706 CeedVector mult_vec = NULL; 707 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 708 709 // Check for composite operator 710 bool is_composite; 711 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 712 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 713 714 // Coarse Grid 715 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 716 CeedElemRestriction rstr_fine = NULL; 717 // -- Clone input fields 718 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 719 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 720 rstr_fine = op_fine->input_fields[i]->elem_rstr; 721 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 722 } else { 723 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 724 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 725 } 726 } 727 // -- Clone output fields 728 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 729 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 730 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 731 } else { 732 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 733 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 734 } 735 } 736 // -- Clone QFunctionAssemblyData 737 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 738 739 // Multiplicity vector 740 if (op_restrict || op_prolong) { 741 CeedVector mult_e_vec; 742 743 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 744 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 745 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 746 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 747 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 748 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 749 CeedCall(CeedVectorDestroy(&mult_e_vec)); 750 CeedCall(CeedVectorReciprocal(mult_vec)); 751 } 752 753 // Clone name 754 bool has_name = op_fine->name; 755 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 756 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 757 758 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 759 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 760 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 761 762 // Restriction/Prolongation Operators 763 CeedInt num_comp; 764 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 765 766 // Restriction 767 if (op_restrict) { 768 CeedInt *num_comp_r_data; 769 CeedQFunction qf_restrict; 770 CeedQFunctionContext ctx_r; 771 772 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 773 CeedCall(CeedCalloc(1, &num_comp_r_data)); 774 num_comp_r_data[0] = num_comp; 775 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 776 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 777 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 778 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 779 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 780 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 781 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 782 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 783 784 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 785 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 786 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 787 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 788 789 // Set name 790 char *restriction_name; 791 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 792 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 793 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 794 CeedCall(CeedFree(&restriction_name)); 795 796 // Check 797 CeedCall(CeedOperatorCheckReady(*op_restrict)); 798 799 // Cleanup 800 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 801 } 802 803 // Prolongation 804 if (op_prolong) { 805 CeedInt *num_comp_p_data; 806 CeedQFunction qf_prolong; 807 CeedQFunctionContext ctx_p; 808 809 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 810 CeedCall(CeedCalloc(1, &num_comp_p_data)); 811 num_comp_p_data[0] = num_comp; 812 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 813 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 814 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 815 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 816 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 817 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 818 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 819 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 820 821 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 822 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 823 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 824 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 825 826 // Set name 827 char *prolongation_name; 828 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 829 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 830 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 831 CeedCall(CeedFree(&prolongation_name)); 832 833 // Check 834 CeedCall(CeedOperatorCheckReady(*op_prolong)); 835 836 // Cleanup 837 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 838 } 839 840 // Check 841 CeedCall(CeedOperatorCheckReady(*op_coarse)); 842 843 // Cleanup 844 CeedCall(CeedVectorDestroy(&mult_vec)); 845 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 846 847 return CEED_ERROR_SUCCESS; 848 } 849 850 /** 851 @brief Build 1D mass matrix and Laplacian with perturbation 852 853 @param[in] interp_1d Interpolation matrix in one dimension 854 @param[in] grad_1d Gradient matrix in one dimension 855 @param[in] q_weight_1d Quadrature weights in one dimension 856 @param[in] P_1d Number of basis nodes in one dimension 857 @param[in] Q_1d Number of quadrature points in one dimension 858 @param[in] dim Dimension of basis 859 @param[out] mass Assembled mass matrix in one dimension 860 @param[out] laplace Assembled perturbed Laplacian in one dimension 861 862 @return An error code: 0 - success, otherwise - failure 863 864 @ref Developer 865 **/ 866 CeedPragmaOptimizeOff 867 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 868 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 869 for (CeedInt i = 0; i < P_1d; i++) { 870 for (CeedInt j = 0; j < P_1d; j++) { 871 CeedScalar sum = 0.0; 872 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 873 mass[i + j * P_1d] = sum; 874 } 875 } 876 // -- Laplacian 877 for (CeedInt i = 0; i < P_1d; i++) { 878 for (CeedInt j = 0; j < P_1d; j++) { 879 CeedScalar sum = 0.0; 880 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 881 laplace[i + j * P_1d] = sum; 882 } 883 } 884 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 885 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 886 return CEED_ERROR_SUCCESS; 887 } 888 CeedPragmaOptimizeOn 889 890 /// @} 891 892 /// ---------------------------------------------------------------------------- 893 /// CeedOperator Backend API 894 /// ---------------------------------------------------------------------------- 895 /// @addtogroup CeedOperatorBackend 896 /// @{ 897 898 /** 899 @brief Create object holding CeedQFunction assembly data for CeedOperator 900 901 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 902 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 903 904 @return An error code: 0 - success, otherwise - failure 905 906 @ref Backend 907 **/ 908 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 909 CeedCall(CeedCalloc(1, data)); 910 (*data)->ref_count = 1; 911 (*data)->ceed = ceed; 912 CeedCall(CeedReference(ceed)); 913 914 return CEED_ERROR_SUCCESS; 915 } 916 917 /** 918 @brief Increment the reference counter for a CeedQFunctionAssemblyData 919 920 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 921 922 @return An error code: 0 - success, otherwise - failure 923 924 @ref Backend 925 **/ 926 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 927 data->ref_count++; 928 return CEED_ERROR_SUCCESS; 929 } 930 931 /** 932 @brief Set re-use of CeedQFunctionAssemblyData 933 934 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 935 @param[in] reuse_data Boolean flag indicating data re-use 936 937 @return An error code: 0 - success, otherwise - failure 938 939 @ref Backend 940 **/ 941 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 942 data->reuse_data = reuse_data; 943 data->needs_data_update = true; 944 return CEED_ERROR_SUCCESS; 945 } 946 947 /** 948 @brief Mark QFunctionAssemblyData as stale 949 950 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 951 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 952 953 @return An error code: 0 - success, otherwise - failure 954 955 @ref Backend 956 **/ 957 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 958 data->needs_data_update = needs_data_update; 959 return CEED_ERROR_SUCCESS; 960 } 961 962 /** 963 @brief Determine if QFunctionAssemblyData needs update 964 965 @param[in] data CeedQFunctionAssemblyData to mark as stale 966 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 967 968 @return An error code: 0 - success, otherwise - failure 969 970 @ref Backend 971 **/ 972 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 973 *is_update_needed = !data->reuse_data || data->needs_data_update; 974 return CEED_ERROR_SUCCESS; 975 } 976 977 /** 978 @brief Copy the pointer to a CeedQFunctionAssemblyData. 979 980 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 981 982 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 983 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 984 CeedQFunctionAssemblyData. 985 986 @param[in] data CeedQFunctionAssemblyData to copy reference to 987 @param[in,out] data_copy Variable to store copied reference 988 989 @return An error code: 0 - success, otherwise - failure 990 991 @ref Backend 992 **/ 993 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 994 CeedCall(CeedQFunctionAssemblyDataReference(data)); 995 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 996 *data_copy = data; 997 return CEED_ERROR_SUCCESS; 998 } 999 1000 /** 1001 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1002 1003 @param[in] data CeedQFunctionAssemblyData to retrieve status 1004 @param[out] is_setup Boolean flag for setup status 1005 1006 @return An error code: 0 - success, otherwise - failure 1007 1008 @ref Backend 1009 **/ 1010 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1011 *is_setup = data->is_setup; 1012 return CEED_ERROR_SUCCESS; 1013 } 1014 1015 /** 1016 @brief Set internal objects for CeedQFunctionAssemblyData 1017 1018 @param[in,out] data CeedQFunctionAssemblyData to set objects 1019 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1020 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1021 1022 @return An error code: 0 - success, otherwise - failure 1023 1024 @ref Backend 1025 **/ 1026 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1027 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1028 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1029 1030 data->is_setup = true; 1031 return CEED_ERROR_SUCCESS; 1032 } 1033 1034 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1035 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1036 1037 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1038 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1039 1040 return CEED_ERROR_SUCCESS; 1041 } 1042 1043 /** 1044 @brief Destroy CeedQFunctionAssemblyData 1045 1046 @param[in,out] data CeedQFunctionAssemblyData to destroy 1047 1048 @return An error code: 0 - success, otherwise - failure 1049 1050 @ref Backend 1051 **/ 1052 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1053 if (!*data || --(*data)->ref_count > 0) { 1054 *data = NULL; 1055 return CEED_ERROR_SUCCESS; 1056 } 1057 CeedCall(CeedDestroy(&(*data)->ceed)); 1058 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1059 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1060 1061 CeedCall(CeedFree(data)); 1062 return CEED_ERROR_SUCCESS; 1063 } 1064 1065 /** 1066 @brief Get CeedOperatorAssemblyData 1067 1068 @param[in] op CeedOperator to assemble 1069 @param[out] data CeedQFunctionAssemblyData 1070 1071 @return An error code: 0 - success, otherwise - failure 1072 1073 @ref Backend 1074 **/ 1075 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1076 if (!op->op_assembled) { 1077 CeedOperatorAssemblyData data; 1078 1079 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1080 op->op_assembled = data; 1081 } 1082 *data = op->op_assembled; 1083 1084 return CEED_ERROR_SUCCESS; 1085 } 1086 1087 /** 1088 @brief Create object holding CeedOperator assembly data. 1089 1090 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1091 An array with references to the corresponding active CeedElemRestrictions is also stored. 1092 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1093 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1094 CeedEvalMode. 1095 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1096 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1097 1098 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1099 @param[in] op CeedOperator to be assembled 1100 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1101 1102 @return An error code: 0 - success, otherwise - failure 1103 1104 @ref Backend 1105 **/ 1106 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1107 CeedInt num_active_bases = 0; 1108 1109 // Allocate 1110 CeedCall(CeedCalloc(1, data)); 1111 (*data)->ceed = ceed; 1112 CeedCall(CeedReference(ceed)); 1113 1114 // Build OperatorAssembly data 1115 CeedQFunction qf; 1116 CeedQFunctionField *qf_fields; 1117 CeedOperatorField *op_fields; 1118 CeedInt num_input_fields; 1119 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1120 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1121 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1122 1123 // Determine active input basis 1124 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1125 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1126 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1127 for (CeedInt i = 0; i < num_input_fields; i++) { 1128 CeedVector vec; 1129 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1130 if (vec == CEED_VECTOR_ACTIVE) { 1131 CeedBasis basis_in = NULL; 1132 CeedEvalMode eval_mode; 1133 CeedInt index = -1, dim, num_comp, q_comp; 1134 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1135 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1136 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1137 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1138 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1139 for (CeedInt i = 0; i < num_active_bases; i++) { 1140 if ((*data)->active_bases[i] == basis_in) index = i; 1141 } 1142 if (index == -1) { 1143 CeedElemRestriction elem_rstr_in; 1144 index = num_active_bases; 1145 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1146 (*data)->active_bases[num_active_bases] = NULL; 1147 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1148 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1149 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1150 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1151 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1152 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1153 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1154 num_eval_modes_in[index] = 0; 1155 num_eval_modes_out[index] = 0; 1156 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1157 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1158 eval_modes_in[index] = NULL; 1159 eval_modes_out[index] = NULL; 1160 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1161 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1162 eval_mode_offsets_in[index] = NULL; 1163 eval_mode_offsets_out[index] = NULL; 1164 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1165 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1166 (*data)->assembled_bases_in[index] = NULL; 1167 (*data)->assembled_bases_out[index] = NULL; 1168 num_active_bases++; 1169 } 1170 if (eval_mode != CEED_EVAL_WEIGHT) { 1171 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1172 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1173 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1174 for (CeedInt d = 0; d < q_comp; d++) { 1175 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1176 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1177 offset += num_comp; 1178 } 1179 num_eval_modes_in[index] += q_comp; 1180 } 1181 } 1182 } 1183 (*data)->num_eval_modes_in = num_eval_modes_in; 1184 (*data)->eval_modes_in = eval_modes_in; 1185 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1186 1187 // Determine active output basis 1188 CeedInt num_output_fields; 1189 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1190 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1191 offset = 0; 1192 for (CeedInt i = 0; i < num_output_fields; i++) { 1193 CeedVector vec; 1194 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1195 if (vec == CEED_VECTOR_ACTIVE) { 1196 CeedBasis basis_out = NULL; 1197 CeedEvalMode eval_mode; 1198 CeedInt index = -1, dim, num_comp, q_comp; 1199 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1200 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1201 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1202 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1203 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1204 for (CeedInt i = 0; i < num_active_bases; i++) { 1205 if ((*data)->active_bases[i] == basis_out) index = i; 1206 } 1207 if (index == -1) { 1208 CeedElemRestriction elem_rstr_out; 1209 1210 index = num_active_bases; 1211 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1212 (*data)->active_bases[num_active_bases] = NULL; 1213 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1214 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1215 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1216 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1217 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1218 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1219 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1220 num_eval_modes_in[index] = 0; 1221 num_eval_modes_out[index] = 0; 1222 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1223 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1224 eval_modes_in[index] = NULL; 1225 eval_modes_out[index] = NULL; 1226 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1227 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1228 eval_mode_offsets_in[index] = NULL; 1229 eval_mode_offsets_out[index] = NULL; 1230 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1231 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1232 (*data)->assembled_bases_in[index] = NULL; 1233 (*data)->assembled_bases_out[index] = NULL; 1234 num_active_bases++; 1235 } 1236 if (eval_mode != CEED_EVAL_WEIGHT) { 1237 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1238 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1239 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1240 for (CeedInt d = 0; d < q_comp; d++) { 1241 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1242 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1243 offset += num_comp; 1244 } 1245 num_eval_modes_out[index] += q_comp; 1246 } 1247 } 1248 } 1249 (*data)->num_output_components = offset; 1250 (*data)->num_eval_modes_out = num_eval_modes_out; 1251 (*data)->eval_modes_out = eval_modes_out; 1252 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1253 (*data)->num_active_bases = num_active_bases; 1254 1255 return CEED_ERROR_SUCCESS; 1256 } 1257 1258 /** 1259 @brief Get CeedOperator CeedEvalModes for assembly. 1260 1261 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1262 1263 @param[in] data CeedOperatorAssemblyData 1264 @param[out] num_active_bases Total number of active bases 1265 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1266 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1267 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1268 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1269 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1270 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1271 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1272 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1273 including contributions of all active bases 1274 1275 @return An error code: 0 - success, otherwise - failure 1276 1277 1278 @ref Backend 1279 **/ 1280 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1281 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1282 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1283 if (num_active_bases) *num_active_bases = data->num_active_bases; 1284 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1285 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1286 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1287 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1288 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1289 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1290 if (num_output_components) *num_output_components = data->num_output_components; 1291 1292 return CEED_ERROR_SUCCESS; 1293 } 1294 1295 /** 1296 @brief Get CeedOperator CeedBasis data for assembly. 1297 1298 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1299 1300 @param[in] data CeedOperatorAssemblyData 1301 @param[out] num_active_bases Number of active bases, or NULL 1302 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1303 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1304 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1305 1306 @return An error code: 0 - success, otherwise - failure 1307 1308 @ref Backend 1309 **/ 1310 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1311 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1312 // Assemble B_in, B_out if needed 1313 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1314 CeedInt num_qpts; 1315 1316 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1317 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1318 CeedInt num_nodes; 1319 CeedScalar *B_in = NULL, *identity = NULL; 1320 bool has_eval_none = false; 1321 1322 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1323 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1324 1325 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1326 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1327 } 1328 if (has_eval_none) { 1329 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1330 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1331 identity[i * num_nodes + i] = 1.0; 1332 } 1333 } 1334 1335 for (CeedInt q = 0; q < num_qpts; q++) { 1336 for (CeedInt n = 0; n < num_nodes; n++) { 1337 CeedInt d_in = 0, q_comp_in; 1338 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1339 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1340 const CeedInt qq = data->num_eval_modes_in[b] * q; 1341 const CeedScalar *B = NULL; 1342 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B); 1343 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1344 if (q_comp_in > 1) { 1345 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1346 else B = &B[(++d_in) * num_qpts * num_nodes]; 1347 } 1348 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1349 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1350 } 1351 } 1352 } 1353 if (identity) CeedCall(CeedFree(identity)); 1354 data->assembled_bases_in[b] = B_in; 1355 } 1356 } 1357 1358 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1359 CeedInt num_qpts; 1360 1361 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1362 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1363 CeedInt num_nodes; 1364 bool has_eval_none = false; 1365 CeedScalar *B_out = NULL, *identity = NULL; 1366 1367 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1368 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1369 1370 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1371 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1372 } 1373 if (has_eval_none) { 1374 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1375 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1376 identity[i * num_nodes + i] = 1.0; 1377 } 1378 } 1379 1380 for (CeedInt q = 0; q < num_qpts; q++) { 1381 for (CeedInt n = 0; n < num_nodes; n++) { 1382 CeedInt d_out = 0, q_comp_out; 1383 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1384 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1385 const CeedInt qq = data->num_eval_modes_out[b] * q; 1386 const CeedScalar *B = NULL; 1387 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B); 1388 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1389 if (q_comp_out > 1) { 1390 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1391 else B = &B[(++d_out) * num_qpts * num_nodes]; 1392 } 1393 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1394 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1395 } 1396 } 1397 } 1398 if (identity) CeedCall(CeedFree(identity)); 1399 data->assembled_bases_out[b] = B_out; 1400 } 1401 } 1402 1403 // Pass out assembled data 1404 if (active_bases) *active_bases = data->active_bases; 1405 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1406 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1407 1408 return CEED_ERROR_SUCCESS; 1409 } 1410 1411 /** 1412 @brief Get CeedOperator CeedBasis data for assembly. 1413 1414 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1415 1416 @param[in] data CeedOperatorAssemblyData 1417 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1418 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1419 1420 @return An error code: 0 - success, otherwise - failure 1421 1422 @ref Backend 1423 **/ 1424 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1425 CeedElemRestriction **active_elem_rstrs) { 1426 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1427 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1428 1429 return CEED_ERROR_SUCCESS; 1430 } 1431 1432 /** 1433 @brief Destroy CeedOperatorAssemblyData 1434 1435 @param[in,out] data CeedOperatorAssemblyData to destroy 1436 1437 @return An error code: 0 - success, otherwise - failure 1438 1439 @ref Backend 1440 **/ 1441 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1442 if (!*data) { 1443 *data = NULL; 1444 return CEED_ERROR_SUCCESS; 1445 } 1446 CeedCall(CeedDestroy(&(*data)->ceed)); 1447 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1448 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1449 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1450 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1451 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1452 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1453 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1454 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1455 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1456 } 1457 CeedCall(CeedFree(&(*data)->active_bases)); 1458 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1459 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1460 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1461 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1462 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1463 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1464 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1465 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1466 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1467 1468 CeedCall(CeedFree(data)); 1469 return CEED_ERROR_SUCCESS; 1470 } 1471 1472 /// @} 1473 1474 /// ---------------------------------------------------------------------------- 1475 /// CeedOperator Public API 1476 /// ---------------------------------------------------------------------------- 1477 /// @addtogroup CeedOperatorUser 1478 /// @{ 1479 1480 /** 1481 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1482 1483 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1484 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1485 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1486 1487 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1488 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1489 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1490 1491 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1492 1493 @param[in] op CeedOperator to assemble CeedQFunction 1494 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1495 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1496 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1497 1498 @return An error code: 0 - success, otherwise - failure 1499 1500 @ref User 1501 **/ 1502 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1503 CeedCall(CeedOperatorCheckReady(op)); 1504 1505 if (op->LinearAssembleQFunction) { 1506 // Backend version 1507 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1508 } else { 1509 // Operator fallback 1510 CeedOperator op_fallback; 1511 1512 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1513 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1514 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1515 } 1516 return CEED_ERROR_SUCCESS; 1517 } 1518 1519 /** 1520 @brief Assemble CeedQFunction and store result internally. 1521 1522 Return copied references of stored data to the caller. 1523 Caller is responsible for ownership and destruction of the copied references. 1524 See also @ref CeedOperatorLinearAssembleQFunction 1525 1526 @param[in] op CeedOperator to assemble CeedQFunction 1527 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1528 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1529 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1530 1531 @return An error code: 0 - success, otherwise - failure 1532 1533 @ref User 1534 **/ 1535 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1536 CeedCall(CeedOperatorCheckReady(op)); 1537 1538 if (op->LinearAssembleQFunctionUpdate) { 1539 // Backend version 1540 bool qf_assembled_is_setup; 1541 CeedVector assembled_vec = NULL; 1542 CeedElemRestriction assembled_rstr = NULL; 1543 1544 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1545 if (qf_assembled_is_setup) { 1546 bool update_needed; 1547 1548 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1549 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1550 if (update_needed) { 1551 CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1552 } 1553 } else { 1554 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1555 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1556 } 1557 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1558 1559 // Copy reference from internally held copy 1560 *assembled = NULL; 1561 *rstr = NULL; 1562 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1563 CeedCall(CeedVectorDestroy(&assembled_vec)); 1564 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1565 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1566 } else { 1567 // Operator fallback 1568 CeedOperator op_fallback; 1569 1570 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1571 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1572 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1573 } 1574 1575 return CEED_ERROR_SUCCESS; 1576 } 1577 1578 /** 1579 @brief Assemble the diagonal of a square linear CeedOperator 1580 1581 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1582 1583 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1584 1585 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1586 1587 @param[in] op CeedOperator to assemble CeedQFunction 1588 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1589 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1590 1591 @return An error code: 0 - success, otherwise - failure 1592 1593 @ref User 1594 **/ 1595 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1596 bool is_composite; 1597 CeedCall(CeedOperatorCheckReady(op)); 1598 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1599 1600 CeedSize input_size = 0, output_size = 0; 1601 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1602 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1603 1604 // Early exit for empty operator 1605 if (!is_composite) { 1606 CeedInt num_elem = 0; 1607 1608 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1609 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1610 } 1611 1612 if (op->LinearAssembleDiagonal) { 1613 // Backend version 1614 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1615 return CEED_ERROR_SUCCESS; 1616 } else if (op->LinearAssembleAddDiagonal) { 1617 // Backend version with zeroing first 1618 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1619 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1620 return CEED_ERROR_SUCCESS; 1621 } else { 1622 // Operator fallback 1623 CeedOperator op_fallback; 1624 1625 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1626 if (op_fallback) { 1627 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1628 return CEED_ERROR_SUCCESS; 1629 } 1630 } 1631 // Default interface implementation 1632 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1633 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1634 1635 return CEED_ERROR_SUCCESS; 1636 } 1637 1638 /** 1639 @brief Assemble the diagonal of a square linear CeedOperator 1640 1641 This sums into a CeedVector the diagonal of a linear CeedOperator. 1642 1643 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1644 1645 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1646 1647 @param[in] op CeedOperator to assemble CeedQFunction 1648 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1649 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1650 1651 @return An error code: 0 - success, otherwise - failure 1652 1653 @ref User 1654 **/ 1655 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1656 bool is_composite; 1657 CeedCall(CeedOperatorCheckReady(op)); 1658 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1659 1660 CeedSize input_size = 0, output_size = 0; 1661 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1662 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1663 1664 // Early exit for empty operator 1665 if (!is_composite) { 1666 CeedInt num_elem = 0; 1667 1668 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1669 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1670 } 1671 1672 if (op->LinearAssembleAddDiagonal) { 1673 // Backend version 1674 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1675 return CEED_ERROR_SUCCESS; 1676 } else { 1677 // Operator fallback 1678 CeedOperator op_fallback; 1679 1680 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1681 if (op_fallback) { 1682 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1683 return CEED_ERROR_SUCCESS; 1684 } 1685 } 1686 // Default interface implementation 1687 if (is_composite) { 1688 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1689 } else { 1690 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1691 } 1692 1693 return CEED_ERROR_SUCCESS; 1694 } 1695 1696 /** 1697 @brief Assemble the point block diagonal of a square linear CeedOperator 1698 1699 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1700 1701 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1702 1703 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1704 1705 @param[in] op CeedOperator to assemble CeedQFunction 1706 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1707 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1708 component in]. 1709 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1710 1711 @return An error code: 0 - success, otherwise - failure 1712 1713 @ref User 1714 **/ 1715 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1716 bool is_composite; 1717 CeedCall(CeedOperatorCheckReady(op)); 1718 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1719 1720 CeedSize input_size = 0, output_size = 0; 1721 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1722 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1723 1724 // Early exit for empty operator 1725 if (!is_composite) { 1726 CeedInt num_elem = 0; 1727 1728 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1729 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1730 } 1731 1732 if (op->LinearAssemblePointBlockDiagonal) { 1733 // Backend version 1734 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1735 return CEED_ERROR_SUCCESS; 1736 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1737 // Backend version with zeroing first 1738 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1739 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1740 return CEED_ERROR_SUCCESS; 1741 } else { 1742 // Operator fallback 1743 CeedOperator op_fallback; 1744 1745 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1746 if (op_fallback) { 1747 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1748 return CEED_ERROR_SUCCESS; 1749 } 1750 } 1751 // Default interface implementation 1752 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1753 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1754 1755 return CEED_ERROR_SUCCESS; 1756 } 1757 1758 /** 1759 @brief Assemble the point block diagonal of a square linear CeedOperator 1760 1761 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1762 1763 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1764 1765 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1766 1767 @param[in] op CeedOperator to assemble CeedQFunction 1768 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1769 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1770 component in]. 1771 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1772 1773 @return An error code: 0 - success, otherwise - failure 1774 1775 @ref User 1776 **/ 1777 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1778 bool is_composite; 1779 CeedCall(CeedOperatorCheckReady(op)); 1780 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1781 1782 CeedSize input_size = 0, output_size = 0; 1783 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1784 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1785 1786 // Early exit for empty operator 1787 if (!is_composite) { 1788 CeedInt num_elem = 0; 1789 1790 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1791 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1792 } 1793 1794 if (op->LinearAssembleAddPointBlockDiagonal) { 1795 // Backend version 1796 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1797 return CEED_ERROR_SUCCESS; 1798 } else { 1799 // Operator fallback 1800 CeedOperator op_fallback; 1801 1802 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1803 if (op_fallback) { 1804 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1805 return CEED_ERROR_SUCCESS; 1806 } 1807 } 1808 // Default interface implementation 1809 if (is_composite) { 1810 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1811 } else { 1812 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1813 } 1814 1815 return CEED_ERROR_SUCCESS; 1816 } 1817 1818 /** 1819 @brief Fully assemble the nonzero pattern of a linear operator. 1820 1821 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1822 1823 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1824 matrix in entry (i, j). 1825 Note that the (i, j) pairs are not unique and may repeat. 1826 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1827 1828 This will generally be slow unless your operator is low-order. 1829 1830 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1831 1832 @param[in] op CeedOperator to assemble 1833 @param[out] num_entries Number of entries in coordinate nonzero pattern 1834 @param[out] rows Row number for each entry 1835 @param[out] cols Column number for each entry 1836 1837 @ref User 1838 **/ 1839 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1840 CeedInt num_suboperators, single_entries; 1841 CeedOperator *sub_operators; 1842 bool is_composite; 1843 CeedCall(CeedOperatorCheckReady(op)); 1844 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1845 1846 if (op->LinearAssembleSymbolic) { 1847 // Backend version 1848 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1849 return CEED_ERROR_SUCCESS; 1850 } else { 1851 // Operator fallback 1852 CeedOperator op_fallback; 1853 1854 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1855 if (op_fallback) { 1856 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1857 return CEED_ERROR_SUCCESS; 1858 } 1859 } 1860 1861 // Default interface implementation 1862 1863 // count entries and allocate rows, cols arrays 1864 *num_entries = 0; 1865 if (is_composite) { 1866 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1867 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1868 for (CeedInt k = 0; k < num_suboperators; ++k) { 1869 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1870 *num_entries += single_entries; 1871 } 1872 } else { 1873 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1874 *num_entries += single_entries; 1875 } 1876 CeedCall(CeedCalloc(*num_entries, rows)); 1877 CeedCall(CeedCalloc(*num_entries, cols)); 1878 1879 // assemble nonzero locations 1880 CeedInt offset = 0; 1881 if (is_composite) { 1882 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1883 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1884 for (CeedInt k = 0; k < num_suboperators; ++k) { 1885 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1886 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1887 offset += single_entries; 1888 } 1889 } else { 1890 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1891 } 1892 1893 return CEED_ERROR_SUCCESS; 1894 } 1895 1896 /** 1897 @brief Fully assemble the nonzero entries of a linear operator. 1898 1899 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1900 1901 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1902 matrix in entry (i, j). 1903 Note that the (i, j) pairs are not unique and may repeat. 1904 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1905 1906 This will generally be slow unless your operator is low-order. 1907 1908 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1909 1910 @param[in] op CeedOperator to assemble 1911 @param[out] values Values to assemble into matrix 1912 1913 @ref User 1914 **/ 1915 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1916 CeedInt num_suboperators, single_entries = 0; 1917 CeedOperator *sub_operators; 1918 bool is_composite; 1919 CeedCall(CeedOperatorCheckReady(op)); 1920 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1921 1922 // Early exit for empty operator 1923 if (!is_composite) { 1924 CeedInt num_elem = 0; 1925 1926 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1927 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1928 } 1929 1930 if (op->LinearAssemble) { 1931 // Backend version 1932 CeedCall(op->LinearAssemble(op, values)); 1933 return CEED_ERROR_SUCCESS; 1934 } else { 1935 // Operator fallback 1936 CeedOperator op_fallback; 1937 1938 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1939 if (op_fallback) { 1940 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 1941 return CEED_ERROR_SUCCESS; 1942 } 1943 } 1944 1945 // Default interface implementation 1946 CeedInt offset = 0; 1947 CeedCall(CeedVectorSetValue(values, 0.0)); 1948 if (is_composite) { 1949 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1950 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1951 for (CeedInt k = 0; k < num_suboperators; k++) { 1952 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 1953 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1954 offset += single_entries; 1955 } 1956 } else { 1957 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 1958 } 1959 1960 return CEED_ERROR_SUCCESS; 1961 } 1962 1963 /** 1964 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 1965 1966 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1967 1968 @param[in] op Composite CeedOperator 1969 @param[in] num_skip_indices Number of suboperators to skip 1970 @param[in] skip_indices Array of indices of suboperators to skip 1971 @param[out] mult Vector to store multiplicity (of size l_size) 1972 1973 @return An error code: 0 - success, otherwise - failure 1974 1975 @ref User 1976 **/ 1977 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 1978 CeedCall(CeedOperatorCheckReady(op)); 1979 1980 Ceed ceed; 1981 CeedInt num_suboperators; 1982 CeedSize l_vec_len; 1983 CeedScalar *mult_array; 1984 CeedVector ones_l_vec; 1985 CeedElemRestriction elem_rstr; 1986 CeedOperator *sub_operators; 1987 1988 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1989 1990 // Zero mult vector 1991 CeedCall(CeedVectorSetValue(mult, 0.0)); 1992 1993 // Get suboperators 1994 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1995 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1996 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 1997 1998 // Work vector 1999 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2000 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2001 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2002 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2003 2004 // Compute multiplicity across suboperators 2005 for (CeedInt i = 0; i < num_suboperators; i++) { 2006 const CeedScalar *sub_mult_array; 2007 CeedVector sub_mult_l_vec, ones_e_vec; 2008 2009 // -- Check for suboperator to skip 2010 for (CeedInt j = 0; j < num_skip_indices; j++) { 2011 if (skip_indices[j] == i) continue; 2012 } 2013 2014 // -- Sub operator multiplicity 2015 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2016 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2017 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2018 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2019 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2020 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2021 // ---- Flag every node present in the current suboperator 2022 for (CeedInt j = 0; j < l_vec_len; j++) { 2023 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2024 } 2025 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2026 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2027 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2028 } 2029 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2030 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2031 2032 return CEED_ERROR_SUCCESS; 2033 } 2034 2035 /** 2036 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2037 grid interpolation 2038 2039 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2040 2041 @param[in] op_fine Fine grid operator 2042 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2043 @param[in] rstr_coarse Coarse grid restriction 2044 @param[in] basis_coarse Coarse grid active vector basis 2045 @param[out] op_coarse Coarse grid operator 2046 @param[out] op_prolong Coarse to fine operator, or NULL 2047 @param[out] op_restrict Fine to coarse operator, or NULL 2048 2049 @return An error code: 0 - success, otherwise - failure 2050 2051 @ref User 2052 **/ 2053 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2054 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2055 CeedCall(CeedOperatorCheckReady(op_fine)); 2056 2057 // Build prolongation matrix, if required 2058 CeedBasis basis_c_to_f = NULL; 2059 if (op_prolong || op_restrict) { 2060 CeedBasis basis_fine; 2061 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2062 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2063 } 2064 2065 // Core code 2066 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2067 2068 return CEED_ERROR_SUCCESS; 2069 } 2070 2071 /** 2072 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2073 2074 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2075 2076 @param[in] op_fine Fine grid operator 2077 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2078 @param[in] rstr_coarse Coarse grid restriction 2079 @param[in] basis_coarse Coarse grid active vector basis 2080 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2081 @param[out] op_coarse Coarse grid operator 2082 @param[out] op_prolong Coarse to fine operator, or NULL 2083 @param[out] op_restrict Fine to coarse operator, or NULL 2084 2085 @return An error code: 0 - success, otherwise - failure 2086 2087 @ref User 2088 **/ 2089 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2090 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2091 CeedOperator *op_restrict) { 2092 CeedCall(CeedOperatorCheckReady(op_fine)); 2093 Ceed ceed; 2094 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2095 2096 // Check for compatible quadrature spaces 2097 CeedBasis basis_fine; 2098 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2099 CeedInt Q_f, Q_c; 2100 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2101 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2102 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2103 2104 // Create coarse to fine basis, if required 2105 CeedBasis basis_c_to_f = NULL; 2106 if (op_prolong || op_restrict) { 2107 // Check if interpolation matrix is provided 2108 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2109 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2110 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2111 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2112 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2113 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2114 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2115 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2116 CeedScalar *q_ref, *q_weight, *grad; 2117 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2118 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2119 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2120 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2121 CeedCall(CeedFree(&q_ref)); 2122 CeedCall(CeedFree(&q_weight)); 2123 CeedCall(CeedFree(&grad)); 2124 } 2125 2126 // Core code 2127 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2128 return CEED_ERROR_SUCCESS; 2129 } 2130 2131 /** 2132 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2133 2134 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2135 2136 @param[in] op_fine Fine grid operator 2137 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2138 @param[in] rstr_coarse Coarse grid restriction 2139 @param[in] basis_coarse Coarse grid active vector basis 2140 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2141 @param[out] op_coarse Coarse grid operator 2142 @param[out] op_prolong Coarse to fine operator, or NULL 2143 @param[out] op_restrict Fine to coarse operator, or NULL 2144 2145 @return An error code: 0 - success, otherwise - failure 2146 2147 @ref User 2148 **/ 2149 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2150 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2151 CeedOperator *op_restrict) { 2152 CeedCall(CeedOperatorCheckReady(op_fine)); 2153 Ceed ceed; 2154 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2155 2156 // Check for compatible quadrature spaces 2157 CeedBasis basis_fine; 2158 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2159 CeedInt Q_f, Q_c; 2160 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2161 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2162 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2163 2164 // Coarse to fine basis 2165 CeedBasis basis_c_to_f = NULL; 2166 if (op_prolong || op_restrict) { 2167 // Check if interpolation matrix is provided 2168 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2169 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2170 CeedElemTopology topo; 2171 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2172 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2173 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2174 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2175 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2176 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2177 CeedScalar *q_ref, *q_weight, *grad; 2178 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2179 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2180 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2181 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2182 CeedCall(CeedFree(&q_ref)); 2183 CeedCall(CeedFree(&q_weight)); 2184 CeedCall(CeedFree(&grad)); 2185 } 2186 2187 // Core code 2188 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2189 return CEED_ERROR_SUCCESS; 2190 } 2191 2192 /** 2193 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2194 2195 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2196 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2197 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2198 \hat S V\f$. 2199 The CeedOperator must be linear and non-composite. 2200 The associated CeedQFunction must therefore also be linear. 2201 2202 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2203 2204 @param[in] op CeedOperator to create element inverses 2205 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2206 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2207 2208 @return An error code: 0 - success, otherwise - failure 2209 2210 @ref User 2211 **/ 2212 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2213 CeedCall(CeedOperatorCheckReady(op)); 2214 2215 if (op->CreateFDMElementInverse) { 2216 // Backend version 2217 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2218 return CEED_ERROR_SUCCESS; 2219 } else { 2220 // Operator fallback 2221 CeedOperator op_fallback; 2222 2223 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2224 if (op_fallback) { 2225 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2226 return CEED_ERROR_SUCCESS; 2227 } 2228 } 2229 2230 // Default interface implementation 2231 Ceed ceed, ceed_parent; 2232 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2233 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2234 ceed_parent = ceed_parent ? ceed_parent : ceed; 2235 CeedQFunction qf; 2236 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2237 2238 // Determine active input basis 2239 bool interp = false, grad = false; 2240 CeedBasis basis = NULL; 2241 CeedElemRestriction rstr = NULL; 2242 CeedOperatorField *op_fields; 2243 CeedQFunctionField *qf_fields; 2244 CeedInt num_input_fields; 2245 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2246 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2247 for (CeedInt i = 0; i < num_input_fields; i++) { 2248 CeedVector vec; 2249 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2250 if (vec == CEED_VECTOR_ACTIVE) { 2251 CeedEvalMode eval_mode; 2252 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2253 interp = interp || eval_mode == CEED_EVAL_INTERP; 2254 grad = grad || eval_mode == CEED_EVAL_GRAD; 2255 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2256 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2257 } 2258 } 2259 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2260 CeedSize l_size = 1; 2261 CeedInt P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2262 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2263 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2264 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2265 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2266 CeedCall(CeedBasisGetDimension(basis, &dim)); 2267 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2268 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2269 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2270 2271 // Build and diagonalize 1D Mass and Laplacian 2272 bool is_tensor_basis; 2273 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2274 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2275 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2276 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2277 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2278 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2279 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2280 CeedCall(CeedCalloc(P_1d, &lambda)); 2281 // -- Build matrices 2282 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2283 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2284 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2285 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2286 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2287 2288 // -- Diagonalize 2289 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2290 CeedCall(CeedFree(&mass)); 2291 CeedCall(CeedFree(&laplace)); 2292 for (CeedInt i = 0; i < P_1d; i++) { 2293 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2294 } 2295 CeedCall(CeedFree(&x)); 2296 2297 // Assemble QFunction 2298 CeedVector assembled; 2299 CeedElemRestriction rstr_qf; 2300 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2301 CeedInt layout[3]; 2302 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2303 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2304 CeedScalar max_norm = 0; 2305 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2306 2307 // Calculate element averages 2308 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2309 CeedScalar *elem_avg; 2310 const CeedScalar *assembled_array, *q_weight_array; 2311 CeedVector q_weight; 2312 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2313 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2314 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2315 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2316 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2317 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2318 for (CeedInt e = 0; e < num_elem; e++) { 2319 CeedInt count = 0; 2320 for (CeedInt q = 0; q < num_qpts; q++) { 2321 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2322 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2323 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2324 count++; 2325 } 2326 } 2327 } 2328 if (count) { 2329 elem_avg[e] /= count; 2330 } else { 2331 elem_avg[e] = 1.0; 2332 } 2333 } 2334 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2335 CeedCall(CeedVectorDestroy(&assembled)); 2336 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2337 CeedCall(CeedVectorDestroy(&q_weight)); 2338 2339 // Build FDM diagonal 2340 CeedVector q_data; 2341 CeedScalar *q_data_array, *fdm_diagonal; 2342 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2343 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2344 for (CeedInt c = 0; c < num_comp; c++) { 2345 for (CeedInt n = 0; n < num_nodes; n++) { 2346 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2347 if (grad) { 2348 for (CeedInt d = 0; d < dim; d++) { 2349 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2350 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2351 } 2352 } 2353 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2354 } 2355 } 2356 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2357 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2358 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2359 for (CeedInt e = 0; e < num_elem; e++) { 2360 for (CeedInt c = 0; c < num_comp; c++) { 2361 for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2362 } 2363 } 2364 CeedCall(CeedFree(&elem_avg)); 2365 CeedCall(CeedFree(&fdm_diagonal)); 2366 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2367 2368 // Setup FDM operator 2369 // -- Basis 2370 CeedBasis fdm_basis; 2371 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2372 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2373 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2374 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2375 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2376 CeedCall(CeedFree(&fdm_interp)); 2377 CeedCall(CeedFree(&grad_dummy)); 2378 CeedCall(CeedFree(&q_ref_dummy)); 2379 CeedCall(CeedFree(&q_weight_dummy)); 2380 CeedCall(CeedFree(&lambda)); 2381 2382 // -- Restriction 2383 CeedElemRestriction rstr_qd_i; 2384 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2385 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2386 // -- QFunction 2387 CeedQFunction qf_fdm; 2388 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2389 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2390 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2391 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2392 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2393 // -- QFunction context 2394 CeedInt *num_comp_data; 2395 CeedCall(CeedCalloc(1, &num_comp_data)); 2396 num_comp_data[0] = num_comp; 2397 CeedQFunctionContext ctx_fdm; 2398 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2399 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2400 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2401 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2402 // -- Operator 2403 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2404 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2405 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2406 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2407 2408 // Cleanup 2409 CeedCall(CeedVectorDestroy(&q_data)); 2410 CeedCall(CeedBasisDestroy(&fdm_basis)); 2411 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2412 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2413 2414 return CEED_ERROR_SUCCESS; 2415 } 2416 2417 /// @} 2418