1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 char *source_path_with_name = NULL; 39 40 // Check if NULL qf passed in 41 if (!qf) return CEED_ERROR_SUCCESS; 42 43 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 44 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 45 46 if (qf->source_path) { 47 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 48 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 49 memcpy(source_path_with_name, qf->source_path, path_len); 50 memcpy(&source_path_with_name[path_len], ":", 1); 51 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 52 } else { 53 CeedCall(CeedCalloc(1, &source_path_with_name)); 54 } 55 56 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 57 { 58 CeedQFunctionContext ctx; 59 60 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 61 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 62 } 63 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 64 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 65 } 66 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 67 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 68 } 69 CeedCall(CeedFree(&source_path_with_name)); 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 Ceed ceed_fallback; 84 bool is_composite; 85 CeedOperator op_fallback; 86 87 // Check not already created 88 if (op->op_fallback) return CEED_ERROR_SUCCESS; 89 90 // Fallback Ceed 91 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 92 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 93 94 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 95 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 96 97 // Clone Op 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 117 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 118 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 119 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 120 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 121 op->input_fields[i]->vec)); 122 } 123 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 124 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 125 op->output_fields[i]->vec)); 126 } 127 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 128 if (op_fallback->num_qpts == 0) CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 // Cleanup 130 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 131 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 133 } 134 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 135 CeedCall(CeedOperatorCheckReady(op_fallback)); 136 // Note: No ref-counting here so we don't get caught in a reference loop. 137 // The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback. 138 op->op_fallback = op_fallback; 139 op_fallback->op_fallback_parent = op; 140 return CEED_ERROR_SUCCESS; 141 } 142 143 /** 144 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 145 146 @param[in] op CeedOperator to retrieve fallback for 147 @param[out] op_fallback Fallback CeedOperator 148 149 @return An error code: 0 - success, otherwise - failure 150 151 @ref Developer 152 **/ 153 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 154 // Create if needed 155 if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op)); 156 if (op->op_fallback) { 157 bool is_debug; 158 159 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 160 if (is_debug) { 161 Ceed ceed, ceed_fallback; 162 const char *resource, *resource_fallback; 163 164 CeedCall(CeedOperatorGetCeed(op, &ceed)); 165 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 166 CeedCall(CeedGetResource(ceed, &resource)); 167 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 168 169 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n"); 170 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 171 op->op_fallback); 172 } 173 } 174 *op_fallback = op->op_fallback; 175 return CEED_ERROR_SUCCESS; 176 } 177 178 /** 179 @brief Get the parent Ceed context associated with a fallback Ceed context for a CeedOperator 180 181 @param[in] op CeedOperator context 182 @param[out] parent Variable to store parent CeedOperator context 183 184 @return An error code: 0 - success, otherwise - failure 185 186 @ref Developer 187 **/ 188 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) { 189 *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL; 190 return CEED_ERROR_SUCCESS; 191 } 192 193 /** 194 @brief Get the parent Ceed context associated with a fallback Ceed context for a CeedOperator 195 196 @param[in] op CeedOperator context 197 @param[out] parent Variable to store parent Ceed context 198 199 @return An error code: 0 - success, otherwise - failure 200 201 @ref Developer 202 **/ 203 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) { 204 *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : NULL; 205 return CEED_ERROR_SUCCESS; 206 } 207 208 /** 209 @brief Select correct basis matrix pointer based on CeedEvalMode 210 211 @param[in] basis CeedBasis from which to get the basis matrix 212 @param[in] eval_mode Current basis evaluation mode 213 @param[in] identity Pointer to identity matrix 214 @param[out] basis_ptr Basis pointer to set 215 216 @ref Developer 217 **/ 218 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 219 switch (eval_mode) { 220 case CEED_EVAL_NONE: 221 *basis_ptr = identity; 222 break; 223 case CEED_EVAL_INTERP: 224 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 225 break; 226 case CEED_EVAL_GRAD: 227 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 228 break; 229 case CEED_EVAL_DIV: 230 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 231 break; 232 case CEED_EVAL_CURL: 233 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 234 break; 235 case CEED_EVAL_WEIGHT: 236 break; // Caught by QF Assembly 237 } 238 assert(*basis_ptr != NULL); 239 return CEED_ERROR_SUCCESS; 240 } 241 242 /** 243 @brief Create point block restriction for active operator field 244 245 @param[in] rstr Original CeedElemRestriction for active field 246 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 247 248 @return An error code: 0 - success, otherwise - failure 249 250 @ref Developer 251 **/ 252 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 253 Ceed ceed; 254 CeedInt num_elem, num_comp, shift, elem_size, comp_stride, *pointblock_offsets; 255 CeedSize l_size; 256 const CeedInt *offsets; 257 258 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 259 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 260 261 // Expand offsets 262 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 263 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 264 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 265 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 266 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 267 shift = num_comp; 268 if (comp_stride != 1) shift *= num_comp; 269 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 270 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 271 pointblock_offsets[i] = offsets[i] * shift; 272 } 273 274 // Create new restriction 275 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 276 pointblock_offsets, pointblock_rstr)); 277 278 // Cleanup 279 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 280 return CEED_ERROR_SUCCESS; 281 } 282 283 /** 284 @brief Core logic for assembling operator diagonal or point block diagonal 285 286 @param[in] op CeedOperator to assemble point block diagonal 287 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 288 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 289 @param[out] assembled CeedVector to store assembled diagonal 290 291 @return An error code: 0 - success, otherwise - failure 292 293 @ref Developer 294 **/ 295 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 296 Ceed ceed; 297 CeedInt num_input_fields, num_output_fields; 298 CeedInt layout[3]; 299 const CeedScalar *assembled_qf_array; 300 CeedVector assembled_qf = NULL; 301 CeedElemRestriction assembled_elem_rstr = NULL; 302 CeedQFunction qf; 303 304 CeedCall(CeedOperatorGetCeed(op, &ceed)); 305 306 // Assemble QFunction 307 CeedCall(CeedOperatorGetQFunction(op, &qf)); 308 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 309 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 310 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 311 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 312 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 313 314 // Get assembly data 315 const CeedEvalMode **eval_modes_in, **eval_modes_out; 316 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 317 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 318 CeedElemRestriction *active_elem_rstrs; 319 CeedBasis *active_bases; 320 CeedOperatorAssemblyData data; 321 322 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 323 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 324 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 325 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 326 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 327 328 // Loop over all active bases 329 for (CeedInt b = 0; b < num_active_bases; b++) { 330 bool has_eval_none = false; 331 CeedInt num_elem, num_nodes, num_qpts, num_components; 332 CeedScalar *elem_diag_array, *identity = NULL; 333 CeedVector elem_diag; 334 CeedElemRestriction diag_elem_rstr; 335 336 // Assemble point block diagonal restriction, if needed 337 if (is_pointblock) { 338 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs[b], &diag_elem_rstr)); 339 } else { 340 CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs[b], &diag_elem_rstr)); 341 } 342 343 // Create diagonal vector 344 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 345 346 // Assemble element operator diagonals 347 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 348 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 349 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 350 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 351 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 352 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 353 354 // Construct identity matrix for basis if required 355 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 356 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 357 } 358 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 359 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 360 } 361 if (has_eval_none) { 362 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 363 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 364 } 365 366 // Compute the diagonal of B^T D B 367 // Each element 368 for (CeedSize e = 0; e < num_elem; e++) { 369 // Each basis eval mode pair 370 CeedInt d_out = 0, q_comp_out; 371 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 372 373 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 374 CeedInt d_in = 0, q_comp_in; 375 const CeedScalar *B_t = NULL; 376 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 377 378 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t); 379 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out)); 380 if (q_comp_out > 1) { 381 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 382 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 383 } 384 eval_mode_out_prev = eval_modes_out[b][e_out]; 385 386 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 387 const CeedScalar *B = NULL; 388 389 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B); 390 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in)); 391 if (q_comp_in > 1) { 392 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 393 else B = &B[(++d_in) * num_qpts * num_nodes]; 394 } 395 eval_mode_in_prev = eval_modes_in[b][e_in]; 396 397 // Each component 398 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 399 // Each qpt/node pair 400 for (CeedInt q = 0; q < num_qpts; q++) { 401 if (is_pointblock) { 402 // Point Block Diagonal 403 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 404 const CeedSize c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 405 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 406 407 for (CeedInt n = 0; n < num_nodes; n++) { 408 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 409 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 410 } 411 } 412 } else { 413 // Diagonal Only 414 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 415 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 416 417 for (CeedInt n = 0; n < num_nodes; n++) { 418 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 419 } 420 } 421 } 422 } 423 } 424 } 425 } 426 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 427 428 // Assemble local operator diagonal 429 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 430 431 // Cleanup 432 CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 433 CeedCall(CeedVectorDestroy(&elem_diag)); 434 CeedCall(CeedFree(&identity)); 435 } 436 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 437 CeedCall(CeedVectorDestroy(&assembled_qf)); 438 return CEED_ERROR_SUCCESS; 439 } 440 441 /** 442 @brief Core logic for assembling composite operator diagonal 443 444 @param[in] op CeedOperator to assemble point block diagonal 445 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 446 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 447 @param[out] assembled CeedVector to store assembled diagonal 448 449 @return An error code: 0 - success, otherwise - failure 450 451 @ref Developer 452 **/ 453 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 454 CeedVector assembled) { 455 CeedInt num_sub; 456 CeedOperator *suboperators; 457 458 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 459 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 460 for (CeedInt i = 0; i < num_sub; i++) { 461 if (is_pointblock) { 462 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 463 } else { 464 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 465 } 466 } 467 return CEED_ERROR_SUCCESS; 468 } 469 470 /** 471 @brief Build nonzero pattern for non-composite operator 472 473 Users should generally use CeedOperatorLinearAssembleSymbolic() 474 475 @param[in] op CeedOperator to assemble nonzero pattern 476 @param[in] offset Offset for number of entries 477 @param[out] rows Row number for each entry 478 @param[out] cols Column number for each entry 479 480 @return An error code: 0 - success, otherwise - failure 481 482 @ref Developer 483 **/ 484 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 485 Ceed ceed; 486 bool is_composite; 487 CeedInt num_elem, elem_size, num_comp, layout_er[3], local_num_entries; 488 CeedSize num_nodes, count = 0; 489 CeedScalar *array; 490 const CeedScalar *elem_dof_a; 491 CeedVector index_vec, elem_dof; 492 CeedElemRestriction active_rstr, index_elem_rstr; 493 494 CeedCall(CeedOperatorGetCeed(op, &ceed)); 495 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 496 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 497 498 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 499 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 500 CeedCall(CeedElemRestrictionCreateUnorientedCopy(active_rstr, &index_elem_rstr)); 501 CeedCall(CeedElemRestrictionGetNumElements(index_elem_rstr, &num_elem)); 502 CeedCall(CeedElemRestrictionGetElementSize(index_elem_rstr, &elem_size)); 503 CeedCall(CeedElemRestrictionGetNumComponents(index_elem_rstr, &num_comp)); 504 CeedCall(CeedElemRestrictionGetELayout(index_elem_rstr, &layout_er)); 505 local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 506 507 // Determine elem_dof relation 508 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 509 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 510 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 511 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 512 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 513 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 514 CeedCall(CeedElemRestrictionApply(index_elem_rstr, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 515 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 516 CeedCall(CeedVectorDestroy(&index_vec)); 517 518 // Determine i, j locations for element matrices 519 for (CeedInt e = 0; e < num_elem; e++) { 520 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 521 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 522 for (CeedInt i = 0; i < elem_size; i++) { 523 for (CeedInt j = 0; j < elem_size; j++) { 524 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 525 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 526 const CeedInt row = elem_dof_a[elem_dof_index_row]; 527 const CeedInt col = elem_dof_a[elem_dof_index_col]; 528 529 rows[offset + count] = row; 530 cols[offset + count] = col; 531 count++; 532 } 533 } 534 } 535 } 536 } 537 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 538 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 539 CeedCall(CeedVectorDestroy(&elem_dof)); 540 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr)); 541 return CEED_ERROR_SUCCESS; 542 } 543 544 /** 545 @brief Assemble nonzero entries for non-composite operator 546 547 Users should generally use CeedOperatorLinearAssemble() 548 549 @param[in] op CeedOperator to assemble 550 @param[in] offset Offset for number of entries 551 @param[out] values Values to assemble into matrix 552 553 @return An error code: 0 - success, otherwise - failure 554 555 @ref Developer 556 **/ 557 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 558 Ceed ceed; 559 bool is_composite; 560 561 CeedCall(CeedOperatorGetCeed(op, &ceed)); 562 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 563 564 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 565 566 // Early exit for empty operator 567 { 568 CeedInt num_elem = 0; 569 570 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 571 if (num_elem == 0) return CEED_ERROR_SUCCESS; 572 } 573 574 if (op->LinearAssembleSingle) { 575 // Backend version 576 CeedCall(op->LinearAssembleSingle(op, offset, values)); 577 return CEED_ERROR_SUCCESS; 578 } else { 579 // Operator fallback 580 CeedOperator op_fallback; 581 582 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 583 if (op_fallback) { 584 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 585 return CEED_ERROR_SUCCESS; 586 } 587 } 588 589 // Assemble QFunction 590 const bool *orients = NULL; 591 const CeedInt8 *curl_orients = NULL; 592 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases, num_input_fields, num_output_fields, num_elem, elem_size, num_qpts, num_comp, 593 local_num_entries, layout_qf[3]; 594 const CeedScalar *assembled_qf_array; 595 CeedVector assembled_qf = NULL; 596 CeedRestrictionType rstr_type; 597 CeedElemRestriction rstr_q = NULL, active_rstr; 598 const CeedEvalMode **eval_modes_in, **eval_modes_out; 599 CeedBasis *bases, basis_in; 600 CeedQFunction qf; 601 CeedOperatorAssemblyData data; 602 CeedOperatorField *input_fields, *output_fields; 603 604 CeedCall(CeedOperatorGetQFunction(op, &qf)); 605 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 606 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 607 608 // Get assembly data 609 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 610 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 611 &eval_modes_out, NULL, NULL)); 612 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 613 basis_in = bases[0]; 614 615 CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 616 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 617 618 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 619 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 620 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 621 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 622 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 623 local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 624 625 CeedCall(CeedElemRestrictionGetType(active_rstr, &rstr_type)); 626 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 627 CeedCall(CeedElemRestrictionGetOrientations(active_rstr, CEED_MEM_HOST, &orients)); 628 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 629 CeedCall(CeedElemRestrictionGetCurlOrientations(active_rstr, CEED_MEM_HOST, &curl_orients)); 630 } 631 632 // Loop over elements and put in data structure 633 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 634 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 635 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 636 637 // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 638 CeedSize count = 0; 639 CeedScalar *vals, BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]], elem_mat[elem_size * elem_size]; 640 const CeedScalar **B_mats_in, **B_mats_out; 641 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 642 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 643 644 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 645 for (CeedSize e = 0; e < num_elem; e++) { 646 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 647 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 648 // Compute B^T*D 649 for (CeedSize n = 0; n < elem_size; n++) { 650 for (CeedSize q = 0; q < num_qpts; q++) { 651 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 652 const CeedSize btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 653 CeedScalar sum = 0.0; 654 655 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 656 const CeedSize b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 657 const CeedSize eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 658 const CeedSize qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 659 660 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 661 } 662 BTD_mat[btd_index] = sum; 663 } 664 } 665 } 666 667 // Form element matrix itself (for each block component) 668 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 669 670 // Transform the element matrix if required 671 if (orients) { 672 const bool *elem_orients = &orients[e * elem_size]; 673 674 for (CeedInt i = 0; i < elem_size; i++) { 675 for (CeedInt j = 0; j < elem_size; j++) { 676 elem_mat[i * elem_size + j] *= elem_orients[i] ? -1.0 : 1.0; 677 elem_mat[i * elem_size + j] *= elem_orients[j] ? -1.0 : 1.0; 678 } 679 } 680 } else if (curl_orients) { 681 const CeedInt8 *elem_curl_orients = &curl_orients[e * 3 * elem_size]; 682 CeedScalar o_elem_mat[elem_size * elem_size]; 683 684 // T^T*(B^T*D*B) 685 for (CeedInt i = 0; i < elem_size; i++) { 686 for (CeedInt j = 0; j < elem_size; j++) { 687 o_elem_mat[i * elem_size + j] = elem_mat[i * elem_size + j] * elem_curl_orients[3 * i + 1] + 688 (i > 0 ? elem_mat[(i - 1) * elem_size + j] * elem_curl_orients[3 * i - 1] : 0.0) + 689 (i < elem_size - 1 ? elem_mat[(i + 1) * elem_size + j] * elem_curl_orients[3 * i + 3] : 0.0); 690 } 691 } 692 // T^T*(B^T*D*B)*T 693 for (CeedInt i = 0; i < elem_size; i++) { 694 for (CeedInt j = 0; j < elem_size; j++) { 695 elem_mat[i * elem_size + j] = o_elem_mat[i * elem_size + j] * elem_curl_orients[3 * j + 1] + 696 (j > 0 ? o_elem_mat[i * elem_size + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) + 697 (j < elem_size - 1 ? o_elem_mat[i * elem_size + j + 1] * elem_curl_orients[3 * j + 3] : 0.0); 698 } 699 } 700 } 701 702 // Put element matrix in coordinate data structure 703 for (CeedInt i = 0; i < elem_size; i++) { 704 for (CeedInt j = 0; j < elem_size; j++) { 705 vals[offset + count] = elem_mat[i * elem_size + j]; 706 count++; 707 } 708 } 709 } 710 } 711 } 712 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 713 CeedCall(CeedVectorRestoreArray(values, &vals)); 714 715 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 716 CeedCall(CeedVectorDestroy(&assembled_qf)); 717 718 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 719 CeedCall(CeedElemRestrictionRestoreOrientations(active_rstr, &orients)); 720 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 721 CeedCall(CeedElemRestrictionRestoreCurlOrientations(active_rstr, &curl_orients)); 722 } 723 return CEED_ERROR_SUCCESS; 724 } 725 726 /** 727 @brief Count number of entries for assembled CeedOperator 728 729 @param[in] op CeedOperator to assemble 730 @param[out] num_entries Number of entries in assembled representation 731 732 @return An error code: 0 - success, otherwise - failure 733 734 @ref Utility 735 **/ 736 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) { 737 bool is_composite; 738 CeedInt num_elem, elem_size, num_comp; 739 CeedElemRestriction rstr; 740 741 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 742 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 743 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 744 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 745 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 746 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 747 *num_entries = (CeedSize)elem_size * num_comp * elem_size * num_comp * num_elem; 748 return CEED_ERROR_SUCCESS; 749 } 750 751 /** 752 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 753 754 @param[in] op_fine Fine grid operator 755 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 756 @param[in] rstr_coarse Coarse grid restriction 757 @param[in] basis_coarse Coarse grid active vector basis 758 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 759 @param[out] op_coarse Coarse grid operator 760 @param[out] op_prolong Coarse to fine operator, or NULL 761 @param[out] op_restrict Fine to coarse operator, or NULL 762 763 @return An error code: 0 - success, otherwise - failure 764 765 @ref Developer 766 **/ 767 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 768 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 769 bool is_composite; 770 Ceed ceed; 771 CeedInt num_comp; 772 CeedVector mult_vec = NULL; 773 CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL; 774 775 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 776 777 // Check for composite operator 778 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 779 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 780 781 // Coarse Grid 782 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 783 // -- Clone input fields 784 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 785 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 786 rstr_fine = op_fine->input_fields[i]->elem_rstr; 787 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 788 } else { 789 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 790 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 791 } 792 } 793 // -- Clone output fields 794 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 795 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 796 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 797 } else { 798 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 799 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 800 } 801 } 802 // -- Clone QFunctionAssemblyData 803 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 804 805 // Multiplicity vector 806 if (op_restrict || op_prolong) { 807 CeedVector mult_e_vec; 808 CeedRestrictionType rstr_type; 809 810 CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type)); 811 CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED, 812 "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported"); 813 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 814 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 815 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 816 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 817 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 818 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 819 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 820 CeedCall(CeedVectorDestroy(&mult_e_vec)); 821 CeedCall(CeedVectorReciprocal(mult_vec)); 822 } 823 824 // Clone name 825 bool has_name = op_fine->name; 826 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 827 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 828 829 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 830 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 831 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 832 833 // Restriction/Prolongation Operators 834 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 835 836 // Restriction 837 if (op_restrict) { 838 CeedInt *num_comp_r_data; 839 CeedQFunctionContext ctx_r; 840 CeedQFunction qf_restrict; 841 842 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 843 CeedCall(CeedCalloc(1, &num_comp_r_data)); 844 num_comp_r_data[0] = num_comp; 845 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 846 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 847 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 848 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 849 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 850 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 851 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 852 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 853 854 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 855 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 856 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 857 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 858 859 // Set name 860 char *restriction_name; 861 862 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 863 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 864 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 865 CeedCall(CeedFree(&restriction_name)); 866 867 // Check 868 CeedCall(CeedOperatorCheckReady(*op_restrict)); 869 870 // Cleanup 871 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 872 } 873 874 // Prolongation 875 if (op_prolong) { 876 CeedInt *num_comp_p_data; 877 CeedQFunctionContext ctx_p; 878 CeedQFunction qf_prolong; 879 880 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 881 CeedCall(CeedCalloc(1, &num_comp_p_data)); 882 num_comp_p_data[0] = num_comp; 883 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 884 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 885 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 886 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 887 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 888 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 889 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 890 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 891 892 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 893 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 894 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 895 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 896 897 // Set name 898 char *prolongation_name; 899 900 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 901 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 902 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 903 CeedCall(CeedFree(&prolongation_name)); 904 905 // Check 906 CeedCall(CeedOperatorCheckReady(*op_prolong)); 907 908 // Cleanup 909 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 910 } 911 912 // Check 913 CeedCall(CeedOperatorCheckReady(*op_coarse)); 914 915 // Cleanup 916 CeedCall(CeedVectorDestroy(&mult_vec)); 917 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 918 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 919 return CEED_ERROR_SUCCESS; 920 } 921 922 /** 923 @brief Build 1D mass matrix and Laplacian with perturbation 924 925 @param[in] interp_1d Interpolation matrix in one dimension 926 @param[in] grad_1d Gradient matrix in one dimension 927 @param[in] q_weight_1d Quadrature weights in one dimension 928 @param[in] P_1d Number of basis nodes in one dimension 929 @param[in] Q_1d Number of quadrature points in one dimension 930 @param[in] dim Dimension of basis 931 @param[out] mass Assembled mass matrix in one dimension 932 @param[out] laplace Assembled perturbed Laplacian in one dimension 933 934 @return An error code: 0 - success, otherwise - failure 935 936 @ref Developer 937 **/ 938 CeedPragmaOptimizeOff 939 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 940 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 941 for (CeedInt i = 0; i < P_1d; i++) { 942 for (CeedInt j = 0; j < P_1d; j++) { 943 CeedScalar sum = 0.0; 944 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 945 mass[i + j * P_1d] = sum; 946 } 947 } 948 // -- Laplacian 949 for (CeedInt i = 0; i < P_1d; i++) { 950 for (CeedInt j = 0; j < P_1d; j++) { 951 CeedScalar sum = 0.0; 952 953 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 954 laplace[i + j * P_1d] = sum; 955 } 956 } 957 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 958 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 959 return CEED_ERROR_SUCCESS; 960 } 961 CeedPragmaOptimizeOn 962 963 /// @} 964 965 /// ---------------------------------------------------------------------------- 966 /// CeedOperator Backend API 967 /// ---------------------------------------------------------------------------- 968 /// @addtogroup CeedOperatorBackend 969 /// @{ 970 971 /** 972 @brief Create object holding CeedQFunction assembly data for CeedOperator 973 974 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 975 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 976 977 @return An error code: 0 - success, otherwise - failure 978 979 @ref Backend 980 **/ 981 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 982 CeedCall(CeedCalloc(1, data)); 983 (*data)->ref_count = 1; 984 (*data)->ceed = ceed; 985 CeedCall(CeedReference(ceed)); 986 return CEED_ERROR_SUCCESS; 987 } 988 989 /** 990 @brief Increment the reference counter for a CeedQFunctionAssemblyData 991 992 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 993 994 @return An error code: 0 - success, otherwise - failure 995 996 @ref Backend 997 **/ 998 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 999 data->ref_count++; 1000 return CEED_ERROR_SUCCESS; 1001 } 1002 1003 /** 1004 @brief Set re-use of CeedQFunctionAssemblyData 1005 1006 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 1007 @param[in] reuse_data Boolean flag indicating data re-use 1008 1009 @return An error code: 0 - success, otherwise - failure 1010 1011 @ref Backend 1012 **/ 1013 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 1014 data->reuse_data = reuse_data; 1015 data->needs_data_update = true; 1016 return CEED_ERROR_SUCCESS; 1017 } 1018 1019 /** 1020 @brief Mark QFunctionAssemblyData as stale 1021 1022 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 1023 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 1024 1025 @return An error code: 0 - success, otherwise - failure 1026 1027 @ref Backend 1028 **/ 1029 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 1030 data->needs_data_update = needs_data_update; 1031 return CEED_ERROR_SUCCESS; 1032 } 1033 1034 /** 1035 @brief Determine if QFunctionAssemblyData needs update 1036 1037 @param[in] data CeedQFunctionAssemblyData to mark as stale 1038 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1039 1040 @return An error code: 0 - success, otherwise - failure 1041 1042 @ref Backend 1043 **/ 1044 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1045 *is_update_needed = !data->reuse_data || data->needs_data_update; 1046 return CEED_ERROR_SUCCESS; 1047 } 1048 1049 /** 1050 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1051 1052 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1053 1054 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1055 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1056 CeedQFunctionAssemblyData. 1057 1058 @param[in] data CeedQFunctionAssemblyData to copy reference to 1059 @param[in,out] data_copy Variable to store copied reference 1060 1061 @return An error code: 0 - success, otherwise - failure 1062 1063 @ref Backend 1064 **/ 1065 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1066 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1067 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1068 *data_copy = data; 1069 return CEED_ERROR_SUCCESS; 1070 } 1071 1072 /** 1073 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1074 1075 @param[in] data CeedQFunctionAssemblyData to retrieve status 1076 @param[out] is_setup Boolean flag for setup status 1077 1078 @return An error code: 0 - success, otherwise - failure 1079 1080 @ref Backend 1081 **/ 1082 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1083 *is_setup = data->is_setup; 1084 return CEED_ERROR_SUCCESS; 1085 } 1086 1087 /** 1088 @brief Set internal objects for CeedQFunctionAssemblyData 1089 1090 @param[in,out] data CeedQFunctionAssemblyData to set objects 1091 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1092 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1093 1094 @return An error code: 0 - success, otherwise - failure 1095 1096 @ref Backend 1097 **/ 1098 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1099 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1100 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1101 1102 data->is_setup = true; 1103 return CEED_ERROR_SUCCESS; 1104 } 1105 1106 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1107 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1108 1109 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1110 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1111 return CEED_ERROR_SUCCESS; 1112 } 1113 1114 /** 1115 @brief Destroy CeedQFunctionAssemblyData 1116 1117 @param[in,out] data CeedQFunctionAssemblyData to destroy 1118 1119 @return An error code: 0 - success, otherwise - failure 1120 1121 @ref Backend 1122 **/ 1123 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1124 if (!*data || --(*data)->ref_count > 0) { 1125 *data = NULL; 1126 return CEED_ERROR_SUCCESS; 1127 } 1128 CeedCall(CeedDestroy(&(*data)->ceed)); 1129 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1130 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1131 1132 CeedCall(CeedFree(data)); 1133 return CEED_ERROR_SUCCESS; 1134 } 1135 1136 /** 1137 @brief Get CeedOperatorAssemblyData 1138 1139 @param[in] op CeedOperator to assemble 1140 @param[out] data CeedQFunctionAssemblyData 1141 1142 @return An error code: 0 - success, otherwise - failure 1143 1144 @ref Backend 1145 **/ 1146 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1147 if (!op->op_assembled) { 1148 CeedOperatorAssemblyData data; 1149 1150 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1151 op->op_assembled = data; 1152 } 1153 *data = op->op_assembled; 1154 return CEED_ERROR_SUCCESS; 1155 } 1156 1157 /** 1158 @brief Create object holding CeedOperator assembly data. 1159 1160 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1161 An array with references to the corresponding active CeedElemRestrictions is also stored. 1162 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1163 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1164 CeedEvalMode. 1165 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1166 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1167 1168 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1169 @param[in] op CeedOperator to be assembled 1170 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1171 1172 @return An error code: 0 - success, otherwise - failure 1173 1174 @ref Backend 1175 **/ 1176 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1177 CeedInt num_active_bases = 0, num_input_fields, *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0, num_output_fields; 1178 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1179 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1180 CeedQFunctionField *qf_fields; 1181 CeedQFunction qf; 1182 CeedOperatorField *op_fields; 1183 1184 // Allocate 1185 CeedCall(CeedCalloc(1, data)); 1186 (*data)->ceed = ceed; 1187 CeedCall(CeedReference(ceed)); 1188 1189 // Build OperatorAssembly data 1190 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1191 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1192 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1193 1194 // Determine active input basis 1195 for (CeedInt i = 0; i < num_input_fields; i++) { 1196 CeedVector vec; 1197 1198 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1199 if (vec == CEED_VECTOR_ACTIVE) { 1200 CeedInt index = -1, num_comp, q_comp; 1201 CeedEvalMode eval_mode; 1202 CeedBasis basis_in = NULL; 1203 1204 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1205 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1206 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1207 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1208 for (CeedInt i = 0; i < num_active_bases; i++) { 1209 if ((*data)->active_bases[i] == basis_in) index = i; 1210 } 1211 if (index == -1) { 1212 CeedElemRestriction elem_rstr_in; 1213 1214 index = num_active_bases; 1215 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1216 (*data)->active_bases[num_active_bases] = NULL; 1217 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1218 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1219 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1220 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1221 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1222 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1223 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1224 num_eval_modes_in[index] = 0; 1225 num_eval_modes_out[index] = 0; 1226 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1227 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1228 eval_modes_in[index] = NULL; 1229 eval_modes_out[index] = NULL; 1230 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1231 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1232 eval_mode_offsets_in[index] = NULL; 1233 eval_mode_offsets_out[index] = NULL; 1234 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1235 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1236 (*data)->assembled_bases_in[index] = NULL; 1237 (*data)->assembled_bases_out[index] = NULL; 1238 num_active_bases++; 1239 } 1240 if (eval_mode != CEED_EVAL_WEIGHT) { 1241 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1242 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1243 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1244 for (CeedInt d = 0; d < q_comp; d++) { 1245 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1246 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1247 offset += num_comp; 1248 } 1249 num_eval_modes_in[index] += q_comp; 1250 } 1251 } 1252 } 1253 (*data)->num_eval_modes_in = num_eval_modes_in; 1254 (*data)->eval_modes_in = eval_modes_in; 1255 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1256 1257 // Determine active output basis 1258 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1259 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1260 offset = 0; 1261 for (CeedInt i = 0; i < num_output_fields; i++) { 1262 CeedVector vec; 1263 1264 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1265 if (vec == CEED_VECTOR_ACTIVE) { 1266 CeedInt index = -1, num_comp, q_comp; 1267 CeedEvalMode eval_mode; 1268 CeedBasis basis_out = NULL; 1269 1270 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1271 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1272 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1273 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1274 for (CeedInt i = 0; i < num_active_bases; i++) { 1275 if ((*data)->active_bases[i] == basis_out) index = i; 1276 } 1277 if (index == -1) { 1278 CeedElemRestriction elem_rstr_out; 1279 1280 index = num_active_bases; 1281 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1282 (*data)->active_bases[num_active_bases] = NULL; 1283 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1284 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1285 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1286 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1287 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1288 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1289 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1290 num_eval_modes_in[index] = 0; 1291 num_eval_modes_out[index] = 0; 1292 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1293 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1294 eval_modes_in[index] = NULL; 1295 eval_modes_out[index] = NULL; 1296 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1297 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1298 eval_mode_offsets_in[index] = NULL; 1299 eval_mode_offsets_out[index] = NULL; 1300 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1301 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1302 (*data)->assembled_bases_in[index] = NULL; 1303 (*data)->assembled_bases_out[index] = NULL; 1304 num_active_bases++; 1305 } 1306 if (eval_mode != CEED_EVAL_WEIGHT) { 1307 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1308 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1309 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1310 for (CeedInt d = 0; d < q_comp; d++) { 1311 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1312 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1313 offset += num_comp; 1314 } 1315 num_eval_modes_out[index] += q_comp; 1316 } 1317 } 1318 } 1319 (*data)->num_output_components = offset; 1320 (*data)->num_eval_modes_out = num_eval_modes_out; 1321 (*data)->eval_modes_out = eval_modes_out; 1322 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1323 (*data)->num_active_bases = num_active_bases; 1324 return CEED_ERROR_SUCCESS; 1325 } 1326 1327 /** 1328 @brief Get CeedOperator CeedEvalModes for assembly. 1329 1330 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1331 1332 @param[in] data CeedOperatorAssemblyData 1333 @param[out] num_active_bases Total number of active bases 1334 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1335 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1336 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1337 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1338 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1339 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1340 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1341 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1342 including contributions of all active bases 1343 1344 @return An error code: 0 - success, otherwise - failure 1345 1346 1347 @ref Backend 1348 **/ 1349 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1350 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1351 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1352 if (num_active_bases) *num_active_bases = data->num_active_bases; 1353 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1354 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1355 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1356 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1357 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1358 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1359 if (num_output_components) *num_output_components = data->num_output_components; 1360 return CEED_ERROR_SUCCESS; 1361 } 1362 1363 /** 1364 @brief Get CeedOperator CeedBasis data for assembly. 1365 1366 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1367 1368 @param[in] data CeedOperatorAssemblyData 1369 @param[out] num_active_bases Number of active bases, or NULL 1370 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1371 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1372 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1373 1374 @return An error code: 0 - success, otherwise - failure 1375 1376 @ref Backend 1377 **/ 1378 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1379 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1380 // Assemble B_in, B_out if needed 1381 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1382 CeedInt num_qpts; 1383 1384 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1385 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1386 bool has_eval_none = false; 1387 CeedInt num_nodes; 1388 CeedScalar *B_in = NULL, *identity = NULL; 1389 1390 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1391 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1392 1393 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1394 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1395 } 1396 if (has_eval_none) { 1397 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1398 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1399 identity[i * num_nodes + i] = 1.0; 1400 } 1401 } 1402 1403 for (CeedInt q = 0; q < num_qpts; q++) { 1404 for (CeedInt n = 0; n < num_nodes; n++) { 1405 CeedInt d_in = 0, q_comp_in; 1406 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1407 1408 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1409 const CeedInt qq = data->num_eval_modes_in[b] * q; 1410 const CeedScalar *B = NULL; 1411 1412 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B); 1413 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1414 if (q_comp_in > 1) { 1415 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1416 else B = &B[(++d_in) * num_qpts * num_nodes]; 1417 } 1418 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1419 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1420 } 1421 } 1422 } 1423 if (identity) CeedCall(CeedFree(&identity)); 1424 data->assembled_bases_in[b] = B_in; 1425 } 1426 } 1427 1428 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1429 CeedInt num_qpts; 1430 1431 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1432 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1433 bool has_eval_none = false; 1434 CeedInt num_nodes; 1435 CeedScalar *B_out = NULL, *identity = NULL; 1436 1437 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1438 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1439 1440 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1441 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1442 } 1443 if (has_eval_none) { 1444 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1445 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1446 identity[i * num_nodes + i] = 1.0; 1447 } 1448 } 1449 1450 for (CeedInt q = 0; q < num_qpts; q++) { 1451 for (CeedInt n = 0; n < num_nodes; n++) { 1452 CeedInt d_out = 0, q_comp_out; 1453 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1454 1455 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1456 const CeedInt qq = data->num_eval_modes_out[b] * q; 1457 const CeedScalar *B = NULL; 1458 1459 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B); 1460 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1461 if (q_comp_out > 1) { 1462 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1463 else B = &B[(++d_out) * num_qpts * num_nodes]; 1464 } 1465 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1466 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1467 } 1468 } 1469 } 1470 if (identity) CeedCall(CeedFree(&identity)); 1471 data->assembled_bases_out[b] = B_out; 1472 } 1473 } 1474 1475 // Pass out assembled data 1476 if (active_bases) *active_bases = data->active_bases; 1477 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1478 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1479 return CEED_ERROR_SUCCESS; 1480 } 1481 1482 /** 1483 @brief Get CeedOperator CeedBasis data for assembly. 1484 1485 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1486 1487 @param[in] data CeedOperatorAssemblyData 1488 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1489 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1490 1491 @return An error code: 0 - success, otherwise - failure 1492 1493 @ref Backend 1494 **/ 1495 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1496 CeedElemRestriction **active_elem_rstrs) { 1497 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1498 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1499 return CEED_ERROR_SUCCESS; 1500 } 1501 1502 /** 1503 @brief Destroy CeedOperatorAssemblyData 1504 1505 @param[in,out] data CeedOperatorAssemblyData to destroy 1506 1507 @return An error code: 0 - success, otherwise - failure 1508 1509 @ref Backend 1510 **/ 1511 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1512 if (!*data) { 1513 *data = NULL; 1514 return CEED_ERROR_SUCCESS; 1515 } 1516 CeedCall(CeedDestroy(&(*data)->ceed)); 1517 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1518 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1519 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1520 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1521 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1522 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1523 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1524 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1525 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1526 } 1527 CeedCall(CeedFree(&(*data)->active_bases)); 1528 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1529 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1530 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1531 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1532 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1533 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1534 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1535 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1536 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1537 1538 CeedCall(CeedFree(data)); 1539 return CEED_ERROR_SUCCESS; 1540 } 1541 1542 /// @} 1543 1544 /// ---------------------------------------------------------------------------- 1545 /// CeedOperator Public API 1546 /// ---------------------------------------------------------------------------- 1547 /// @addtogroup CeedOperatorUser 1548 /// @{ 1549 1550 /** 1551 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1552 1553 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1554 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1555 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1556 1557 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1558 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1559 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1560 1561 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1562 1563 @param[in] op CeedOperator to assemble CeedQFunction 1564 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1565 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1566 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1567 1568 @return An error code: 0 - success, otherwise - failure 1569 1570 @ref User 1571 **/ 1572 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1573 CeedCall(CeedOperatorCheckReady(op)); 1574 1575 if (op->LinearAssembleQFunction) { 1576 // Backend version 1577 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1578 } else { 1579 // Operator fallback 1580 CeedOperator op_fallback; 1581 1582 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1583 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1584 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1585 } 1586 return CEED_ERROR_SUCCESS; 1587 } 1588 1589 /** 1590 @brief Assemble CeedQFunction and store result internally. 1591 1592 Return copied references of stored data to the caller. 1593 Caller is responsible for ownership and destruction of the copied references. 1594 See also @ref CeedOperatorLinearAssembleQFunction 1595 1596 Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers. 1597 These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object. 1598 1599 @param[in] op CeedOperator to assemble CeedQFunction 1600 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1601 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1602 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1603 1604 @return An error code: 0 - success, otherwise - failure 1605 1606 @ref User 1607 **/ 1608 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1609 int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL; 1610 CeedOperator op_assemble = NULL; 1611 CeedOperator op_fallback_parent = NULL; 1612 1613 CeedCall(CeedOperatorCheckReady(op)); 1614 1615 // Determine if fallback parent or operator has implementation 1616 CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent)); 1617 if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) { 1618 // -- Backend version for op fallback parent is faster, if it exists 1619 LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate; 1620 op_assemble = op_fallback_parent; 1621 } else if (op->LinearAssembleQFunctionUpdate) { 1622 // -- Backend version for op 1623 LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate; 1624 op_assemble = op; 1625 } 1626 1627 // Assemble QFunction 1628 if (LinearAssembleQFunctionUpdate) { 1629 // Backend or fallback parent version 1630 bool qf_assembled_is_setup; 1631 CeedVector assembled_vec = NULL; 1632 CeedElemRestriction assembled_rstr = NULL; 1633 1634 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1635 if (qf_assembled_is_setup) { 1636 bool update_needed; 1637 1638 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1639 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1640 if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request)); 1641 } else { 1642 CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request)); 1643 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1644 } 1645 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1646 1647 // Copy reference from internally held copy 1648 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1649 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1650 CeedCall(CeedVectorDestroy(&assembled_vec)); 1651 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1652 } else { 1653 // Operator fallback 1654 CeedOperator op_fallback; 1655 1656 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1657 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1658 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1659 } 1660 return CEED_ERROR_SUCCESS; 1661 } 1662 1663 /** 1664 @brief Assemble the diagonal of a square linear CeedOperator 1665 1666 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1667 1668 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1669 1670 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1671 1672 @param[in] op CeedOperator to assemble CeedQFunction 1673 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1674 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1675 1676 @return An error code: 0 - success, otherwise - failure 1677 1678 @ref User 1679 **/ 1680 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1681 bool is_composite; 1682 CeedSize input_size = 0, output_size = 0; 1683 1684 CeedCall(CeedOperatorCheckReady(op)); 1685 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1686 1687 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1688 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1689 1690 // Early exit for empty operator 1691 if (!is_composite) { 1692 CeedInt num_elem = 0; 1693 1694 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1695 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1696 } 1697 1698 if (op->LinearAssembleDiagonal) { 1699 // Backend version 1700 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1701 return CEED_ERROR_SUCCESS; 1702 } else if (op->LinearAssembleAddDiagonal) { 1703 // Backend version with zeroing first 1704 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1705 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1706 return CEED_ERROR_SUCCESS; 1707 } else { 1708 // Operator fallback 1709 CeedOperator op_fallback; 1710 1711 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1712 if (op_fallback) { 1713 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1714 return CEED_ERROR_SUCCESS; 1715 } 1716 } 1717 // Default interface implementation 1718 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1719 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1720 return CEED_ERROR_SUCCESS; 1721 } 1722 1723 /** 1724 @brief Assemble the diagonal of a square linear CeedOperator 1725 1726 This sums into a CeedVector the diagonal of a linear CeedOperator. 1727 1728 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1729 1730 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1731 1732 @param[in] op CeedOperator to assemble CeedQFunction 1733 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1734 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1735 1736 @return An error code: 0 - success, otherwise - failure 1737 1738 @ref User 1739 **/ 1740 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1741 bool is_composite; 1742 CeedSize input_size = 0, output_size = 0; 1743 1744 CeedCall(CeedOperatorCheckReady(op)); 1745 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1746 1747 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1748 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1749 1750 // Early exit for empty operator 1751 if (!is_composite) { 1752 CeedInt num_elem = 0; 1753 1754 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1755 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1756 } 1757 1758 if (op->LinearAssembleAddDiagonal) { 1759 // Backend version 1760 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1761 return CEED_ERROR_SUCCESS; 1762 } else { 1763 // Operator fallback 1764 CeedOperator op_fallback; 1765 1766 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1767 if (op_fallback) { 1768 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1769 return CEED_ERROR_SUCCESS; 1770 } 1771 } 1772 // Default interface implementation 1773 if (is_composite) { 1774 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1775 } else { 1776 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1777 } 1778 return CEED_ERROR_SUCCESS; 1779 } 1780 1781 /** 1782 @brief Assemble the point block diagonal of a square linear CeedOperator 1783 1784 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1785 1786 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1787 1788 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1789 1790 @param[in] op CeedOperator to assemble CeedQFunction 1791 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1792 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1793 component in]. 1794 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1795 1796 @return An error code: 0 - success, otherwise - failure 1797 1798 @ref User 1799 **/ 1800 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1801 bool is_composite; 1802 CeedSize input_size = 0, output_size = 0; 1803 1804 CeedCall(CeedOperatorCheckReady(op)); 1805 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1806 1807 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1808 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1809 1810 // Early exit for empty operator 1811 if (!is_composite) { 1812 CeedInt num_elem = 0; 1813 1814 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1815 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1816 } 1817 1818 if (op->LinearAssemblePointBlockDiagonal) { 1819 // Backend version 1820 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1821 return CEED_ERROR_SUCCESS; 1822 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1823 // Backend version with zeroing first 1824 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1825 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1826 return CEED_ERROR_SUCCESS; 1827 } else { 1828 // Operator fallback 1829 CeedOperator op_fallback; 1830 1831 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1832 if (op_fallback) { 1833 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1834 return CEED_ERROR_SUCCESS; 1835 } 1836 } 1837 // Default interface implementation 1838 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1839 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1840 return CEED_ERROR_SUCCESS; 1841 } 1842 1843 /** 1844 @brief Assemble the point block diagonal of a square linear CeedOperator 1845 1846 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1847 1848 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1849 1850 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1851 1852 @param[in] op CeedOperator to assemble CeedQFunction 1853 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1854 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1855 component in]. 1856 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1857 1858 @return An error code: 0 - success, otherwise - failure 1859 1860 @ref User 1861 **/ 1862 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1863 bool is_composite; 1864 CeedSize input_size = 0, output_size = 0; 1865 1866 CeedCall(CeedOperatorCheckReady(op)); 1867 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1868 1869 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1870 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1871 1872 // Early exit for empty operator 1873 if (!is_composite) { 1874 CeedInt num_elem = 0; 1875 1876 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1877 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1878 } 1879 1880 if (op->LinearAssembleAddPointBlockDiagonal) { 1881 // Backend version 1882 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1883 return CEED_ERROR_SUCCESS; 1884 } else { 1885 // Operator fallback 1886 CeedOperator op_fallback; 1887 1888 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1889 if (op_fallback) { 1890 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1891 return CEED_ERROR_SUCCESS; 1892 } 1893 } 1894 // Default interface implementation 1895 if (is_composite) { 1896 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1897 } else { 1898 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1899 } 1900 return CEED_ERROR_SUCCESS; 1901 } 1902 1903 /** 1904 @brief Fully assemble the nonzero pattern of a linear operator. 1905 1906 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1907 1908 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1909 matrix in entry (i, j). 1910 Note that the (i, j) pairs are not unique and may repeat. 1911 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1912 1913 This will generally be slow unless your operator is low-order. 1914 1915 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1916 1917 @param[in] op CeedOperator to assemble 1918 @param[out] num_entries Number of entries in coordinate nonzero pattern 1919 @param[out] rows Row number for each entry 1920 @param[out] cols Column number for each entry 1921 1922 @ref User 1923 **/ 1924 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1925 bool is_composite; 1926 CeedInt num_suboperators, offset = 0; 1927 CeedSize single_entries; 1928 CeedOperator *sub_operators; 1929 1930 CeedCall(CeedOperatorCheckReady(op)); 1931 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1932 1933 if (op->LinearAssembleSymbolic) { 1934 // Backend version 1935 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1936 return CEED_ERROR_SUCCESS; 1937 } else { 1938 // Operator fallback 1939 CeedOperator op_fallback; 1940 1941 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1942 if (op_fallback) { 1943 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1944 return CEED_ERROR_SUCCESS; 1945 } 1946 } 1947 1948 // Default interface implementation 1949 1950 // count entries and allocate rows, cols arrays 1951 *num_entries = 0; 1952 if (is_composite) { 1953 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1954 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1955 for (CeedInt k = 0; k < num_suboperators; ++k) { 1956 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1957 *num_entries += single_entries; 1958 } 1959 } else { 1960 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1961 *num_entries += single_entries; 1962 } 1963 CeedCall(CeedCalloc(*num_entries, rows)); 1964 CeedCall(CeedCalloc(*num_entries, cols)); 1965 1966 // assemble nonzero locations 1967 if (is_composite) { 1968 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1969 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1970 for (CeedInt k = 0; k < num_suboperators; ++k) { 1971 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1972 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1973 offset += single_entries; 1974 } 1975 } else { 1976 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1977 } 1978 return CEED_ERROR_SUCCESS; 1979 } 1980 1981 /** 1982 @brief Fully assemble the nonzero entries of a linear operator. 1983 1984 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1985 1986 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1987 matrix in entry (i, j). 1988 Note that the (i, j) pairs are not unique and may repeat. 1989 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1990 1991 This will generally be slow unless your operator is low-order. 1992 1993 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1994 1995 @param[in] op CeedOperator to assemble 1996 @param[out] values Values to assemble into matrix 1997 1998 @ref User 1999 **/ 2000 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 2001 bool is_composite; 2002 CeedInt num_suboperators, offset = 0; 2003 CeedSize single_entries = 0; 2004 CeedOperator *sub_operators; 2005 2006 CeedCall(CeedOperatorCheckReady(op)); 2007 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2008 2009 // Early exit for empty operator 2010 if (!is_composite) { 2011 CeedInt num_elem = 0; 2012 2013 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2014 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2015 } 2016 2017 if (op->LinearAssemble) { 2018 // Backend version 2019 CeedCall(op->LinearAssemble(op, values)); 2020 return CEED_ERROR_SUCCESS; 2021 } else { 2022 // Operator fallback 2023 CeedOperator op_fallback; 2024 2025 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2026 if (op_fallback) { 2027 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2028 return CEED_ERROR_SUCCESS; 2029 } 2030 } 2031 2032 // Default interface implementation 2033 CeedCall(CeedVectorSetValue(values, 0.0)); 2034 if (is_composite) { 2035 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2036 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2037 for (CeedInt k = 0; k < num_suboperators; k++) { 2038 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2039 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2040 offset += single_entries; 2041 } 2042 } else { 2043 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2044 } 2045 return CEED_ERROR_SUCCESS; 2046 } 2047 2048 /** 2049 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2050 2051 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2052 2053 @param[in] op Composite CeedOperator 2054 @param[in] num_skip_indices Number of suboperators to skip 2055 @param[in] skip_indices Array of indices of suboperators to skip 2056 @param[out] mult Vector to store multiplicity (of size l_size) 2057 2058 @return An error code: 0 - success, otherwise - failure 2059 2060 @ref User 2061 **/ 2062 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2063 Ceed ceed; 2064 CeedInt num_suboperators; 2065 CeedSize l_vec_len; 2066 CeedScalar *mult_array; 2067 CeedVector ones_l_vec; 2068 CeedElemRestriction elem_rstr, mult_elem_rstr; 2069 CeedOperator *sub_operators; 2070 2071 CeedCall(CeedOperatorCheckReady(op)); 2072 2073 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2074 2075 // Zero mult vector 2076 CeedCall(CeedVectorSetValue(mult, 0.0)); 2077 2078 // Get suboperators 2079 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2080 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2081 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2082 2083 // Work vector 2084 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2085 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2086 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2087 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2088 2089 // Compute multiplicity across suboperators 2090 for (CeedInt i = 0; i < num_suboperators; i++) { 2091 const CeedScalar *sub_mult_array; 2092 CeedVector sub_mult_l_vec, ones_e_vec; 2093 2094 // -- Check for suboperator to skip 2095 for (CeedInt j = 0; j < num_skip_indices; j++) { 2096 if (skip_indices[j] == i) continue; 2097 } 2098 2099 // -- Sub operator multiplicity 2100 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2101 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr)); 2102 CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2103 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2104 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2105 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2106 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2107 // ---- Flag every node present in the current suboperator 2108 for (CeedInt j = 0; j < l_vec_len; j++) { 2109 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2110 } 2111 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2112 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2113 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2114 CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr)); 2115 } 2116 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2117 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2118 return CEED_ERROR_SUCCESS; 2119 } 2120 2121 /** 2122 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2123 grid interpolation 2124 2125 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2126 2127 @param[in] op_fine Fine grid operator 2128 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2129 @param[in] rstr_coarse Coarse grid restriction 2130 @param[in] basis_coarse Coarse grid active vector basis 2131 @param[out] op_coarse Coarse grid operator 2132 @param[out] op_prolong Coarse to fine operator, or NULL 2133 @param[out] op_restrict Fine to coarse operator, or NULL 2134 2135 @return An error code: 0 - success, otherwise - failure 2136 2137 @ref User 2138 **/ 2139 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2140 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2141 CeedBasis basis_c_to_f = NULL; 2142 2143 CeedCall(CeedOperatorCheckReady(op_fine)); 2144 2145 // Build prolongation matrix, if required 2146 if (op_prolong || op_restrict) { 2147 CeedBasis basis_fine; 2148 2149 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2150 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2151 } 2152 2153 // Core code 2154 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2155 return CEED_ERROR_SUCCESS; 2156 } 2157 2158 /** 2159 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2160 2161 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2162 2163 @param[in] op_fine Fine grid operator 2164 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2165 @param[in] rstr_coarse Coarse grid restriction 2166 @param[in] basis_coarse Coarse grid active vector basis 2167 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2168 @param[out] op_coarse Coarse grid operator 2169 @param[out] op_prolong Coarse to fine operator, or NULL 2170 @param[out] op_restrict Fine to coarse operator, or NULL 2171 2172 @return An error code: 0 - success, otherwise - failure 2173 2174 @ref User 2175 **/ 2176 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2177 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2178 CeedOperator *op_restrict) { 2179 Ceed ceed; 2180 CeedInt Q_f, Q_c; 2181 CeedBasis basis_fine, basis_c_to_f = NULL; 2182 2183 CeedCall(CeedOperatorCheckReady(op_fine)); 2184 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2185 2186 // Check for compatible quadrature spaces 2187 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2188 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2189 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2190 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2191 2192 // Create coarse to fine basis, if required 2193 if (op_prolong || op_restrict) { 2194 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2195 CeedScalar *q_ref, *q_weight, *grad; 2196 2197 // Check if interpolation matrix is provided 2198 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2199 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2200 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2201 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2202 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2203 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2204 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2205 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2206 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2207 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2208 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2209 CeedCall(CeedFree(&q_ref)); 2210 CeedCall(CeedFree(&q_weight)); 2211 CeedCall(CeedFree(&grad)); 2212 } 2213 2214 // Core code 2215 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2216 return CEED_ERROR_SUCCESS; 2217 } 2218 2219 /** 2220 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2221 2222 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2223 2224 @param[in] op_fine Fine grid operator 2225 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2226 @param[in] rstr_coarse Coarse grid restriction 2227 @param[in] basis_coarse Coarse grid active vector basis 2228 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2229 @param[out] op_coarse Coarse grid operator 2230 @param[out] op_prolong Coarse to fine operator, or NULL 2231 @param[out] op_restrict Fine to coarse operator, or NULL 2232 2233 @return An error code: 0 - success, otherwise - failure 2234 2235 @ref User 2236 **/ 2237 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2238 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2239 CeedOperator *op_restrict) { 2240 Ceed ceed; 2241 CeedInt Q_f, Q_c; 2242 CeedBasis basis_fine, basis_c_to_f = NULL; 2243 2244 CeedCall(CeedOperatorCheckReady(op_fine)); 2245 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2246 2247 // Check for compatible quadrature spaces 2248 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2249 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2250 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2251 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2252 2253 // Coarse to fine basis 2254 if (op_prolong || op_restrict) { 2255 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2256 CeedScalar *q_ref, *q_weight, *grad; 2257 CeedElemTopology topo; 2258 2259 // Check if interpolation matrix is provided 2260 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2261 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2262 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2263 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2264 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2265 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2266 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2267 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2268 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2269 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2270 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2271 CeedCall(CeedFree(&q_ref)); 2272 CeedCall(CeedFree(&q_weight)); 2273 CeedCall(CeedFree(&grad)); 2274 } 2275 2276 // Core code 2277 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2278 return CEED_ERROR_SUCCESS; 2279 } 2280 2281 /** 2282 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2283 2284 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2285 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2286 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2287 \hat S V\f$. 2288 The CeedOperator must be linear and non-composite. 2289 The associated CeedQFunction must therefore also be linear. 2290 2291 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2292 2293 @param[in] op CeedOperator to create element inverses 2294 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2295 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2296 2297 @return An error code: 0 - success, otherwise - failure 2298 2299 @ref User 2300 **/ 2301 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2302 Ceed ceed, ceed_parent; 2303 bool interp = false, grad = false, is_tensor_basis = true; 2304 CeedInt num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2305 CeedSize l_size = 1; 2306 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg; 2307 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2308 CeedVector q_data; 2309 CeedElemRestriction rstr = NULL, rstr_qd_i; 2310 CeedBasis basis = NULL, fdm_basis; 2311 CeedQFunctionContext ctx_fdm; 2312 CeedQFunctionField *qf_fields; 2313 CeedQFunction qf, qf_fdm; 2314 CeedOperatorField *op_fields; 2315 2316 CeedCall(CeedOperatorCheckReady(op)); 2317 2318 if (op->CreateFDMElementInverse) { 2319 // Backend version 2320 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2321 return CEED_ERROR_SUCCESS; 2322 } else { 2323 // Operator fallback 2324 CeedOperator op_fallback; 2325 2326 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2327 if (op_fallback) { 2328 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2329 return CEED_ERROR_SUCCESS; 2330 } 2331 } 2332 2333 // Default interface implementation 2334 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2335 CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent)); 2336 ceed_parent = ceed_parent ? ceed_parent : ceed; 2337 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2338 2339 // Determine active input basis 2340 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2341 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2342 for (CeedInt i = 0; i < num_input_fields; i++) { 2343 CeedVector vec; 2344 2345 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2346 if (vec == CEED_VECTOR_ACTIVE) { 2347 CeedEvalMode eval_mode; 2348 2349 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2350 interp = interp || eval_mode == CEED_EVAL_INTERP; 2351 grad = grad || eval_mode == CEED_EVAL_GRAD; 2352 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2353 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2354 } 2355 } 2356 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2357 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2358 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2359 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2360 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2361 CeedCall(CeedBasisGetDimension(basis, &dim)); 2362 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2363 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2364 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2365 2366 // Build and diagonalize 1D Mass and Laplacian 2367 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2368 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2369 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2370 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2371 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2372 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2373 CeedCall(CeedCalloc(P_1d, &lambda)); 2374 // -- Build matrices 2375 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2376 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2377 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2378 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2379 2380 // -- Diagonalize 2381 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2382 CeedCall(CeedFree(&mass)); 2383 CeedCall(CeedFree(&laplace)); 2384 for (CeedInt i = 0; i < P_1d; i++) { 2385 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2386 } 2387 CeedCall(CeedFree(&x)); 2388 2389 { 2390 CeedInt layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2391 CeedScalar max_norm = 0; 2392 const CeedScalar *assembled_array, *q_weight_array; 2393 CeedVector assembled = NULL, q_weight; 2394 CeedElemRestriction rstr_qf = NULL; 2395 2396 // Assemble QFunction 2397 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2398 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2399 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2400 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2401 2402 // Calculate element averages 2403 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2404 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2405 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2406 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2407 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2408 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2409 2410 for (CeedInt e = 0; e < num_elem; e++) { 2411 CeedInt count = 0; 2412 2413 for (CeedInt q = 0; q < num_qpts; q++) { 2414 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2415 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2416 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2417 count++; 2418 } 2419 } 2420 } 2421 if (count) { 2422 elem_avg[e] /= count; 2423 } else { 2424 elem_avg[e] = 1.0; 2425 } 2426 } 2427 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2428 CeedCall(CeedVectorDestroy(&assembled)); 2429 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2430 CeedCall(CeedVectorDestroy(&q_weight)); 2431 } 2432 2433 // Build FDM diagonal 2434 { 2435 CeedScalar *q_data_array, *fdm_diagonal; 2436 2437 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2438 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2439 for (CeedInt c = 0; c < num_comp; c++) { 2440 for (CeedInt n = 0; n < num_nodes; n++) { 2441 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2442 if (grad) { 2443 for (CeedInt d = 0; d < dim; d++) { 2444 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2445 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2446 } 2447 } 2448 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2449 } 2450 } 2451 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2452 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2453 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2454 for (CeedInt e = 0; e < num_elem; e++) { 2455 for (CeedInt c = 0; c < num_comp; c++) { 2456 for (CeedInt n = 0; n < num_nodes; n++) 2457 q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2458 } 2459 } 2460 CeedCall(CeedFree(&elem_avg)); 2461 CeedCall(CeedFree(&fdm_diagonal)); 2462 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2463 } 2464 2465 // Setup FDM operator 2466 // -- Basis 2467 { 2468 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2469 2470 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2471 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2472 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2473 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2474 CeedCall(CeedFree(&fdm_interp)); 2475 CeedCall(CeedFree(&grad_dummy)); 2476 CeedCall(CeedFree(&q_ref_dummy)); 2477 CeedCall(CeedFree(&q_weight_dummy)); 2478 CeedCall(CeedFree(&lambda)); 2479 } 2480 2481 // -- Restriction 2482 { 2483 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2484 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2485 } 2486 2487 // -- QFunction 2488 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2489 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2490 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2491 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2492 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2493 2494 // -- QFunction context 2495 { 2496 CeedInt *num_comp_data; 2497 2498 CeedCall(CeedCalloc(1, &num_comp_data)); 2499 num_comp_data[0] = num_comp; 2500 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2501 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2502 } 2503 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2504 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2505 2506 // -- Operator 2507 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2508 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2509 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2510 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2511 2512 // Cleanup 2513 CeedCall(CeedVectorDestroy(&q_data)); 2514 CeedCall(CeedBasisDestroy(&fdm_basis)); 2515 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2516 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2517 return CEED_ERROR_SUCCESS; 2518 } 2519 2520 /// @} 2521