1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = NULL; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 128 // Cleanup 129 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 130 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 131 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 132 } 133 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 134 CeedCall(CeedOperatorCheckReady(op_fallback)); 135 // Note: No ref-counting here so we don't get caught in a reference loop. 136 // The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback. 137 op->op_fallback = op_fallback; 138 op_fallback->op_fallback_parent = op; 139 140 return CEED_ERROR_SUCCESS; 141 } 142 143 /** 144 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 145 146 @param[in] op CeedOperator to retrieve fallback for 147 @param[out] op_fallback Fallback CeedOperator 148 149 @return An error code: 0 - success, otherwise - failure 150 151 @ref Developer 152 **/ 153 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 154 // Create if needed 155 if (!op->op_fallback) { 156 CeedCall(CeedOperatorCreateFallback(op)); 157 } 158 if (op->op_fallback) { 159 bool is_debug; 160 161 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 162 if (is_debug) { 163 Ceed ceed, ceed_fallback; 164 const char *resource, *resource_fallback; 165 166 CeedCall(CeedOperatorGetCeed(op, &ceed)); 167 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 168 CeedCall(CeedGetResource(ceed, &resource)); 169 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 170 171 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n"); 172 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 173 op->op_fallback); 174 } 175 } 176 *op_fallback = op->op_fallback; 177 178 return CEED_ERROR_SUCCESS; 179 } 180 181 /** 182 @brief Select correct basis matrix pointer based on CeedEvalMode 183 184 @param[in] basis CeedBasis from which to get the basis matrix 185 @param[in] eval_mode Current basis evaluation mode 186 @param[in] identity Pointer to identity matrix 187 @param[out] basis_ptr Basis pointer to set 188 189 @ref Developer 190 **/ 191 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 192 switch (eval_mode) { 193 case CEED_EVAL_NONE: 194 *basis_ptr = identity; 195 break; 196 case CEED_EVAL_INTERP: 197 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 198 break; 199 case CEED_EVAL_GRAD: 200 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 201 break; 202 case CEED_EVAL_DIV: 203 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 204 break; 205 case CEED_EVAL_CURL: 206 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 207 break; 208 case CEED_EVAL_WEIGHT: 209 break; // Caught by QF Assembly 210 } 211 assert(*basis_ptr != NULL); 212 213 return CEED_ERROR_SUCCESS; 214 } 215 216 /** 217 @brief Create point block restriction for active operator field 218 219 @param[in] rstr Original CeedElemRestriction for active field 220 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 221 222 @return An error code: 0 - success, otherwise - failure 223 224 @ref Developer 225 **/ 226 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 227 Ceed ceed; 228 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 229 const CeedInt *offsets; 230 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 231 232 // Expand offsets 233 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 234 CeedSize l_size; 235 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 236 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 237 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 238 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 239 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 240 CeedInt shift = num_comp; 241 if (comp_stride != 1) shift *= num_comp; 242 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 243 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 244 pointblock_offsets[i] = offsets[i] * shift; 245 } 246 247 // Create new restriction 248 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 249 pointblock_offsets, pointblock_rstr)); 250 251 // Cleanup 252 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 253 254 return CEED_ERROR_SUCCESS; 255 } 256 257 /** 258 @brief Core logic for assembling operator diagonal or point block diagonal 259 260 @param[in] op CeedOperator to assemble point block diagonal 261 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 262 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 263 @param[out] assembled CeedVector to store assembled diagonal 264 265 @return An error code: 0 - success, otherwise - failure 266 267 @ref Developer 268 **/ 269 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 270 Ceed ceed; 271 CeedCall(CeedOperatorGetCeed(op, &ceed)); 272 273 // Assemble QFunction 274 CeedQFunction qf; 275 const CeedScalar *assembled_qf_array; 276 CeedVector assembled_qf = NULL; 277 CeedElemRestriction assembled_elem_rstr = NULL; 278 CeedInt num_input_fields, num_output_fields; 279 CeedInt layout[3]; 280 281 CeedCall(CeedOperatorGetQFunction(op, &qf)); 282 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 283 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 284 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 285 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 286 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 287 288 // Get assembly data 289 CeedOperatorAssemblyData data; 290 const CeedEvalMode **eval_modes_in, **eval_modes_out; 291 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 292 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 293 CeedBasis *active_bases; 294 CeedElemRestriction *active_elem_rstrs; 295 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 296 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 297 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 298 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 299 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 300 301 // Loop over all active bases 302 for (CeedInt b = 0; b < num_active_bases; b++) { 303 // Assemble point block diagonal restriction, if needed 304 CeedElemRestriction diag_elem_rstr; 305 if (is_pointblock) { 306 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs[b], &diag_elem_rstr)); 307 } else { 308 CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs[b], &diag_elem_rstr)); 309 } 310 311 // Create diagonal vector 312 CeedVector elem_diag; 313 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 314 315 // Assemble element operator diagonals 316 CeedScalar *elem_diag_array; 317 CeedInt num_elem, num_nodes, num_qpts, num_components; 318 319 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 320 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 321 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 322 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 323 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 324 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 325 326 // Construct identity matrix for basis if required 327 bool has_eval_none = false; 328 CeedScalar *identity = NULL; 329 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 330 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 331 } 332 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 333 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 334 } 335 if (has_eval_none) { 336 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 337 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 338 } 339 340 // Compute the diagonal of B^T D B 341 // Each element 342 for (CeedSize e = 0; e < num_elem; e++) { 343 // Each basis eval mode pair 344 CeedInt d_out = 0, q_comp_out; 345 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 346 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 347 const CeedScalar *B_t = NULL; 348 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t); 349 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out)); 350 if (q_comp_out > 1) { 351 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 352 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 353 } 354 eval_mode_out_prev = eval_modes_out[b][e_out]; 355 356 CeedInt d_in = 0, q_comp_in; 357 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 358 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 359 const CeedScalar *B = NULL; 360 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B); 361 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in)); 362 if (q_comp_in > 1) { 363 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 364 else B = &B[(++d_in) * num_qpts * num_nodes]; 365 } 366 eval_mode_in_prev = eval_modes_in[b][e_in]; 367 368 // Each component 369 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 370 // Each qpt/node pair 371 for (CeedInt q = 0; q < num_qpts; q++) { 372 if (is_pointblock) { 373 // Point Block Diagonal 374 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 375 const CeedSize c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 376 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 377 for (CeedInt n = 0; n < num_nodes; n++) { 378 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 379 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 380 } 381 } 382 } else { 383 // Diagonal Only 384 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 385 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 386 for (CeedInt n = 0; n < num_nodes; n++) { 387 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 388 } 389 } 390 } 391 } 392 } 393 } 394 } 395 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 396 397 // Assemble local operator diagonal 398 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 399 400 // Cleanup 401 CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 402 CeedCall(CeedVectorDestroy(&elem_diag)); 403 CeedCall(CeedFree(&identity)); 404 } 405 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 406 CeedCall(CeedVectorDestroy(&assembled_qf)); 407 408 return CEED_ERROR_SUCCESS; 409 } 410 411 /** 412 @brief Core logic for assembling composite operator diagonal 413 414 @param[in] op CeedOperator to assemble point block diagonal 415 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 416 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 417 @param[out] assembled CeedVector to store assembled diagonal 418 419 @return An error code: 0 - success, otherwise - failure 420 421 @ref Developer 422 **/ 423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 424 CeedVector assembled) { 425 CeedInt num_sub; 426 CeedOperator *suboperators; 427 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 428 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 429 for (CeedInt i = 0; i < num_sub; i++) { 430 if (is_pointblock) { 431 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 432 } else { 433 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 434 } 435 } 436 return CEED_ERROR_SUCCESS; 437 } 438 439 /** 440 @brief Build nonzero pattern for non-composite operator 441 442 Users should generally use CeedOperatorLinearAssembleSymbolic() 443 444 @param[in] op CeedOperator to assemble nonzero pattern 445 @param[in] offset Offset for number of entries 446 @param[out] rows Row number for each entry 447 @param[out] cols Column number for each entry 448 449 @return An error code: 0 - success, otherwise - failure 450 451 @ref Developer 452 **/ 453 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 454 Ceed ceed; 455 bool is_composite; 456 CeedCall(CeedOperatorGetCeed(op, &ceed)); 457 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 458 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 459 460 CeedSize num_nodes; 461 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 462 CeedElemRestriction active_rstr; 463 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 464 CeedElemRestriction index_elem_rstr; 465 CeedCall(CeedElemRestrictionCreateUnorientedCopy(active_rstr, &index_elem_rstr)); 466 CeedInt num_elem, elem_size, num_comp; 467 CeedCall(CeedElemRestrictionGetNumElements(index_elem_rstr, &num_elem)); 468 CeedCall(CeedElemRestrictionGetElementSize(index_elem_rstr, &elem_size)); 469 CeedCall(CeedElemRestrictionGetNumComponents(index_elem_rstr, &num_comp)); 470 CeedInt layout_er[3]; 471 CeedCall(CeedElemRestrictionGetELayout(index_elem_rstr, &layout_er)); 472 473 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 474 475 // Determine elem_dof relation 476 CeedVector index_vec; 477 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 478 CeedScalar *array; 479 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 480 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 481 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 482 CeedVector elem_dof; 483 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 484 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 485 CeedCall(CeedElemRestrictionApply(index_elem_rstr, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 486 const CeedScalar *elem_dof_a; 487 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 488 CeedCall(CeedVectorDestroy(&index_vec)); 489 490 // Determine i, j locations for element matrices 491 CeedSize count = 0; 492 for (CeedInt e = 0; e < num_elem; e++) { 493 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 494 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 495 for (CeedInt i = 0; i < elem_size; i++) { 496 for (CeedInt j = 0; j < elem_size; j++) { 497 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 498 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 499 500 const CeedInt row = elem_dof_a[elem_dof_index_row]; 501 const CeedInt col = elem_dof_a[elem_dof_index_col]; 502 503 rows[offset + count] = row; 504 cols[offset + count] = col; 505 count++; 506 } 507 } 508 } 509 } 510 } 511 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 512 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 513 CeedCall(CeedVectorDestroy(&elem_dof)); 514 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr)); 515 516 return CEED_ERROR_SUCCESS; 517 } 518 519 /** 520 @brief Assemble nonzero entries for non-composite operator 521 522 Users should generally use CeedOperatorLinearAssemble() 523 524 @param[in] op CeedOperator to assemble 525 @param[in] offset Offset for number of entries 526 @param[out] values Values to assemble into matrix 527 528 @return An error code: 0 - success, otherwise - failure 529 530 @ref Developer 531 **/ 532 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 533 Ceed ceed; 534 bool is_composite; 535 CeedCall(CeedOperatorGetCeed(op, &ceed)); 536 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 537 538 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 539 540 // Early exit for empty operator 541 { 542 CeedInt num_elem = 0; 543 544 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 545 if (num_elem == 0) return CEED_ERROR_SUCCESS; 546 } 547 548 if (op->LinearAssembleSingle) { 549 // Backend version 550 CeedCall(op->LinearAssembleSingle(op, offset, values)); 551 return CEED_ERROR_SUCCESS; 552 } else { 553 // Operator fallback 554 CeedOperator op_fallback; 555 556 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 557 if (op_fallback) { 558 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 559 return CEED_ERROR_SUCCESS; 560 } 561 } 562 563 // Assemble QFunction 564 CeedQFunction qf; 565 CeedCall(CeedOperatorGetQFunction(op, &qf)); 566 CeedVector assembled_qf = NULL; 567 CeedElemRestriction rstr_q = NULL; 568 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 569 570 CeedInt num_input_fields, num_output_fields; 571 CeedOperatorField *input_fields; 572 CeedOperatorField *output_fields; 573 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 574 575 // Get assembly data 576 CeedOperatorAssemblyData data; 577 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 578 const CeedEvalMode **eval_modes_in, **eval_modes_out; 579 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 580 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 581 &eval_modes_out, NULL, NULL)); 582 CeedBasis *bases; 583 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 584 CeedBasis basis_in = bases[0]; 585 586 CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 587 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 588 589 CeedElemRestriction active_rstr; 590 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 591 CeedInt num_elem, elem_size, num_qpts, num_comp; 592 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 593 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 594 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 595 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 596 597 CeedRestrictionType rstr_type; 598 CeedCall(CeedElemRestrictionGetType(active_rstr, &rstr_type)); 599 const bool *orients = NULL; 600 const CeedInt8 *curl_orients = NULL; 601 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 602 CeedCall(CeedElemRestrictionGetOrientations(active_rstr, CEED_MEM_HOST, &orients)); 603 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 604 CeedCall(CeedElemRestrictionGetCurlOrientations(active_rstr, CEED_MEM_HOST, &curl_orients)); 605 } 606 607 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 608 609 // Loop over elements and put in data structure 610 const CeedScalar *assembled_qf_array; 611 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 612 613 CeedInt layout_qf[3]; 614 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 615 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 616 617 // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 618 const CeedScalar **B_mats_in, **B_mats_out; 619 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 620 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 621 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 622 CeedScalar elem_mat[elem_size * elem_size]; 623 CeedSize count = 0; 624 CeedScalar *vals; 625 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 626 for (CeedSize e = 0; e < num_elem; e++) { 627 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 628 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 629 // Compute B^T*D 630 for (CeedSize n = 0; n < elem_size; n++) { 631 for (CeedSize q = 0; q < num_qpts; q++) { 632 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 633 const CeedSize btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 634 CeedScalar sum = 0.0; 635 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 636 const CeedSize b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 637 const CeedSize eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 638 const CeedSize qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 639 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 640 } 641 BTD_mat[btd_index] = sum; 642 } 643 } 644 } 645 646 // Form element matrix itself (for each block component) 647 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 648 649 // Transform the element matrix if required 650 if (orients) { 651 const bool *elem_orients = &orients[e * elem_size]; 652 for (CeedInt i = 0; i < elem_size; i++) { 653 for (CeedInt j = 0; j < elem_size; j++) { 654 elem_mat[i * elem_size + j] *= elem_orients[i] ? -1.0 : 1.0; 655 elem_mat[i * elem_size + j] *= elem_orients[j] ? -1.0 : 1.0; 656 } 657 } 658 } else if (curl_orients) { 659 const CeedInt8 *elem_curl_orients = &curl_orients[e * 3 * elem_size]; 660 CeedScalar o_elem_mat[elem_size * elem_size]; 661 // T^T*(B^T*D*B) 662 for (CeedInt i = 0; i < elem_size; i++) { 663 for (CeedInt j = 0; j < elem_size; j++) { 664 o_elem_mat[i * elem_size + j] = elem_mat[i * elem_size + j] * elem_curl_orients[3 * i + 1] + 665 (i > 0 ? elem_mat[(i - 1) * elem_size + j] * elem_curl_orients[3 * i - 1] : 0.0) + 666 (i < elem_size - 1 ? elem_mat[(i + 1) * elem_size + j] * elem_curl_orients[3 * i + 3] : 0.0); 667 } 668 } 669 // T^T*(B^T*D*B)*T 670 for (CeedInt i = 0; i < elem_size; i++) { 671 for (CeedInt j = 0; j < elem_size; j++) { 672 elem_mat[i * elem_size + j] = o_elem_mat[i * elem_size + j] * elem_curl_orients[3 * j + 1] + 673 (j > 0 ? o_elem_mat[i * elem_size + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) + 674 (j < elem_size - 1 ? o_elem_mat[i * elem_size + j + 1] * elem_curl_orients[3 * j + 3] : 0.0); 675 } 676 } 677 } 678 679 // Put element matrix in coordinate data structure 680 for (CeedInt i = 0; i < elem_size; i++) { 681 for (CeedInt j = 0; j < elem_size; j++) { 682 vals[offset + count] = elem_mat[i * elem_size + j]; 683 count++; 684 } 685 } 686 } 687 } 688 } 689 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 690 CeedCall(CeedVectorRestoreArray(values, &vals)); 691 692 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 693 CeedCall(CeedVectorDestroy(&assembled_qf)); 694 695 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 696 CeedCall(CeedElemRestrictionRestoreOrientations(active_rstr, &orients)); 697 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 698 CeedCall(CeedElemRestrictionRestoreCurlOrientations(active_rstr, &curl_orients)); 699 } 700 701 return CEED_ERROR_SUCCESS; 702 } 703 704 /** 705 @brief Count number of entries for assembled CeedOperator 706 707 @param[in] op CeedOperator to assemble 708 @param[out] num_entries Number of entries in assembled representation 709 710 @return An error code: 0 - success, otherwise - failure 711 712 @ref Utility 713 **/ 714 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) { 715 bool is_composite; 716 CeedElemRestriction rstr; 717 CeedInt num_elem, elem_size, num_comp; 718 719 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 720 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 721 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 722 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 723 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 724 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 725 *num_entries = (CeedSize)elem_size * num_comp * elem_size * num_comp * num_elem; 726 727 return CEED_ERROR_SUCCESS; 728 } 729 730 /** 731 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 732 733 @param[in] op_fine Fine grid operator 734 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 735 @param[in] rstr_coarse Coarse grid restriction 736 @param[in] basis_coarse Coarse grid active vector basis 737 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 738 @param[out] op_coarse Coarse grid operator 739 @param[out] op_prolong Coarse to fine operator, or NULL 740 @param[out] op_restrict Fine to coarse operator, or NULL 741 742 @return An error code: 0 - success, otherwise - failure 743 744 @ref Developer 745 **/ 746 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 747 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 748 Ceed ceed; 749 CeedVector mult_vec = NULL; 750 CeedElemRestriction rstr_p_mult_fine = NULL; 751 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 752 753 // Check for composite operator 754 bool is_composite; 755 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 756 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 757 758 // Coarse Grid 759 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 760 CeedElemRestriction rstr_fine = NULL; 761 // -- Clone input fields 762 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 763 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 764 rstr_fine = op_fine->input_fields[i]->elem_rstr; 765 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 766 } else { 767 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 768 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 769 } 770 } 771 // -- Clone output fields 772 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 773 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 774 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 775 } else { 776 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 777 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 778 } 779 } 780 // -- Clone QFunctionAssemblyData 781 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 782 783 // Multiplicity vector 784 if (op_restrict || op_prolong) { 785 CeedRestrictionType rstr_type; 786 CeedVector mult_e_vec; 787 788 CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type)); 789 CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED, 790 "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported"); 791 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 792 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 793 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 794 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 795 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 796 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 797 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 798 CeedCall(CeedVectorDestroy(&mult_e_vec)); 799 CeedCall(CeedVectorReciprocal(mult_vec)); 800 } 801 802 // Clone name 803 bool has_name = op_fine->name; 804 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 805 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 806 807 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 808 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 809 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 810 811 // Restriction/Prolongation Operators 812 CeedInt num_comp; 813 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 814 815 // Restriction 816 if (op_restrict) { 817 CeedInt *num_comp_r_data; 818 CeedQFunction qf_restrict; 819 CeedQFunctionContext ctx_r; 820 821 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 822 CeedCall(CeedCalloc(1, &num_comp_r_data)); 823 num_comp_r_data[0] = num_comp; 824 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 825 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 826 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 827 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 828 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 829 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 830 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 831 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 832 833 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 834 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 835 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 836 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 837 838 // Set name 839 char *restriction_name; 840 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 841 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 842 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 843 CeedCall(CeedFree(&restriction_name)); 844 845 // Check 846 CeedCall(CeedOperatorCheckReady(*op_restrict)); 847 848 // Cleanup 849 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 850 } 851 852 // Prolongation 853 if (op_prolong) { 854 CeedInt *num_comp_p_data; 855 CeedQFunction qf_prolong; 856 CeedQFunctionContext ctx_p; 857 858 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 859 CeedCall(CeedCalloc(1, &num_comp_p_data)); 860 num_comp_p_data[0] = num_comp; 861 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 862 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 863 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 864 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 865 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 866 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 867 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 868 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 869 870 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 871 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 872 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 873 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 874 875 // Set name 876 char *prolongation_name; 877 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 878 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 879 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 880 CeedCall(CeedFree(&prolongation_name)); 881 882 // Check 883 CeedCall(CeedOperatorCheckReady(*op_prolong)); 884 885 // Cleanup 886 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 887 } 888 889 // Check 890 CeedCall(CeedOperatorCheckReady(*op_coarse)); 891 892 // Cleanup 893 CeedCall(CeedVectorDestroy(&mult_vec)); 894 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 895 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 896 897 return CEED_ERROR_SUCCESS; 898 } 899 900 /** 901 @brief Build 1D mass matrix and Laplacian with perturbation 902 903 @param[in] interp_1d Interpolation matrix in one dimension 904 @param[in] grad_1d Gradient matrix in one dimension 905 @param[in] q_weight_1d Quadrature weights in one dimension 906 @param[in] P_1d Number of basis nodes in one dimension 907 @param[in] Q_1d Number of quadrature points in one dimension 908 @param[in] dim Dimension of basis 909 @param[out] mass Assembled mass matrix in one dimension 910 @param[out] laplace Assembled perturbed Laplacian in one dimension 911 912 @return An error code: 0 - success, otherwise - failure 913 914 @ref Developer 915 **/ 916 CeedPragmaOptimizeOff 917 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 918 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 919 for (CeedInt i = 0; i < P_1d; i++) { 920 for (CeedInt j = 0; j < P_1d; j++) { 921 CeedScalar sum = 0.0; 922 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 923 mass[i + j * P_1d] = sum; 924 } 925 } 926 // -- Laplacian 927 for (CeedInt i = 0; i < P_1d; i++) { 928 for (CeedInt j = 0; j < P_1d; j++) { 929 CeedScalar sum = 0.0; 930 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 931 laplace[i + j * P_1d] = sum; 932 } 933 } 934 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 935 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 936 return CEED_ERROR_SUCCESS; 937 } 938 CeedPragmaOptimizeOn 939 940 /// @} 941 942 /// ---------------------------------------------------------------------------- 943 /// CeedOperator Backend API 944 /// ---------------------------------------------------------------------------- 945 /// @addtogroup CeedOperatorBackend 946 /// @{ 947 948 /** 949 @brief Create object holding CeedQFunction assembly data for CeedOperator 950 951 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 952 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 953 954 @return An error code: 0 - success, otherwise - failure 955 956 @ref Backend 957 **/ 958 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 959 CeedCall(CeedCalloc(1, data)); 960 (*data)->ref_count = 1; 961 (*data)->ceed = ceed; 962 CeedCall(CeedReference(ceed)); 963 964 return CEED_ERROR_SUCCESS; 965 } 966 967 /** 968 @brief Increment the reference counter for a CeedQFunctionAssemblyData 969 970 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 971 972 @return An error code: 0 - success, otherwise - failure 973 974 @ref Backend 975 **/ 976 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 977 data->ref_count++; 978 return CEED_ERROR_SUCCESS; 979 } 980 981 /** 982 @brief Set re-use of CeedQFunctionAssemblyData 983 984 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 985 @param[in] reuse_data Boolean flag indicating data re-use 986 987 @return An error code: 0 - success, otherwise - failure 988 989 @ref Backend 990 **/ 991 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 992 data->reuse_data = reuse_data; 993 data->needs_data_update = true; 994 return CEED_ERROR_SUCCESS; 995 } 996 997 /** 998 @brief Mark QFunctionAssemblyData as stale 999 1000 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 1001 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 1002 1003 @return An error code: 0 - success, otherwise - failure 1004 1005 @ref Backend 1006 **/ 1007 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 1008 data->needs_data_update = needs_data_update; 1009 return CEED_ERROR_SUCCESS; 1010 } 1011 1012 /** 1013 @brief Determine if QFunctionAssemblyData needs update 1014 1015 @param[in] data CeedQFunctionAssemblyData to mark as stale 1016 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1017 1018 @return An error code: 0 - success, otherwise - failure 1019 1020 @ref Backend 1021 **/ 1022 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1023 *is_update_needed = !data->reuse_data || data->needs_data_update; 1024 return CEED_ERROR_SUCCESS; 1025 } 1026 1027 /** 1028 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1029 1030 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1031 1032 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1033 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1034 CeedQFunctionAssemblyData. 1035 1036 @param[in] data CeedQFunctionAssemblyData to copy reference to 1037 @param[in,out] data_copy Variable to store copied reference 1038 1039 @return An error code: 0 - success, otherwise - failure 1040 1041 @ref Backend 1042 **/ 1043 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1044 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1045 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1046 *data_copy = data; 1047 return CEED_ERROR_SUCCESS; 1048 } 1049 1050 /** 1051 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1052 1053 @param[in] data CeedQFunctionAssemblyData to retrieve status 1054 @param[out] is_setup Boolean flag for setup status 1055 1056 @return An error code: 0 - success, otherwise - failure 1057 1058 @ref Backend 1059 **/ 1060 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1061 *is_setup = data->is_setup; 1062 return CEED_ERROR_SUCCESS; 1063 } 1064 1065 /** 1066 @brief Set internal objects for CeedQFunctionAssemblyData 1067 1068 @param[in,out] data CeedQFunctionAssemblyData to set objects 1069 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1070 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1071 1072 @return An error code: 0 - success, otherwise - failure 1073 1074 @ref Backend 1075 **/ 1076 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1077 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1078 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1079 1080 data->is_setup = true; 1081 return CEED_ERROR_SUCCESS; 1082 } 1083 1084 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1085 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1086 1087 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1088 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1089 1090 return CEED_ERROR_SUCCESS; 1091 } 1092 1093 /** 1094 @brief Destroy CeedQFunctionAssemblyData 1095 1096 @param[in,out] data CeedQFunctionAssemblyData to destroy 1097 1098 @return An error code: 0 - success, otherwise - failure 1099 1100 @ref Backend 1101 **/ 1102 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1103 if (!*data || --(*data)->ref_count > 0) { 1104 *data = NULL; 1105 return CEED_ERROR_SUCCESS; 1106 } 1107 CeedCall(CeedDestroy(&(*data)->ceed)); 1108 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1109 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1110 1111 CeedCall(CeedFree(data)); 1112 return CEED_ERROR_SUCCESS; 1113 } 1114 1115 /** 1116 @brief Get CeedOperatorAssemblyData 1117 1118 @param[in] op CeedOperator to assemble 1119 @param[out] data CeedQFunctionAssemblyData 1120 1121 @return An error code: 0 - success, otherwise - failure 1122 1123 @ref Backend 1124 **/ 1125 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1126 if (!op->op_assembled) { 1127 CeedOperatorAssemblyData data; 1128 1129 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1130 op->op_assembled = data; 1131 } 1132 *data = op->op_assembled; 1133 1134 return CEED_ERROR_SUCCESS; 1135 } 1136 1137 /** 1138 @brief Create object holding CeedOperator assembly data. 1139 1140 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1141 An array with references to the corresponding active CeedElemRestrictions is also stored. 1142 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1143 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1144 CeedEvalMode. 1145 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1146 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1147 1148 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1149 @param[in] op CeedOperator to be assembled 1150 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1151 1152 @return An error code: 0 - success, otherwise - failure 1153 1154 @ref Backend 1155 **/ 1156 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1157 CeedInt num_active_bases = 0; 1158 1159 // Allocate 1160 CeedCall(CeedCalloc(1, data)); 1161 (*data)->ceed = ceed; 1162 CeedCall(CeedReference(ceed)); 1163 1164 // Build OperatorAssembly data 1165 CeedQFunction qf; 1166 CeedQFunctionField *qf_fields; 1167 CeedOperatorField *op_fields; 1168 CeedInt num_input_fields; 1169 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1170 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1171 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1172 1173 // Determine active input basis 1174 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1175 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1176 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1177 for (CeedInt i = 0; i < num_input_fields; i++) { 1178 CeedVector vec; 1179 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1180 if (vec == CEED_VECTOR_ACTIVE) { 1181 CeedBasis basis_in = NULL; 1182 CeedEvalMode eval_mode; 1183 CeedInt index = -1, num_comp, q_comp; 1184 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1185 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1186 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1187 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1188 for (CeedInt i = 0; i < num_active_bases; i++) { 1189 if ((*data)->active_bases[i] == basis_in) index = i; 1190 } 1191 if (index == -1) { 1192 CeedElemRestriction elem_rstr_in; 1193 index = num_active_bases; 1194 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1195 (*data)->active_bases[num_active_bases] = NULL; 1196 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1197 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1198 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1199 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1200 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1201 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1202 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1203 num_eval_modes_in[index] = 0; 1204 num_eval_modes_out[index] = 0; 1205 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1206 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1207 eval_modes_in[index] = NULL; 1208 eval_modes_out[index] = NULL; 1209 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1210 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1211 eval_mode_offsets_in[index] = NULL; 1212 eval_mode_offsets_out[index] = NULL; 1213 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1214 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1215 (*data)->assembled_bases_in[index] = NULL; 1216 (*data)->assembled_bases_out[index] = NULL; 1217 num_active_bases++; 1218 } 1219 if (eval_mode != CEED_EVAL_WEIGHT) { 1220 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1221 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1222 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1223 for (CeedInt d = 0; d < q_comp; d++) { 1224 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1225 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1226 offset += num_comp; 1227 } 1228 num_eval_modes_in[index] += q_comp; 1229 } 1230 } 1231 } 1232 (*data)->num_eval_modes_in = num_eval_modes_in; 1233 (*data)->eval_modes_in = eval_modes_in; 1234 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1235 1236 // Determine active output basis 1237 CeedInt num_output_fields; 1238 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1239 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1240 offset = 0; 1241 for (CeedInt i = 0; i < num_output_fields; i++) { 1242 CeedVector vec; 1243 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1244 if (vec == CEED_VECTOR_ACTIVE) { 1245 CeedBasis basis_out = NULL; 1246 CeedEvalMode eval_mode; 1247 CeedInt index = -1, num_comp, q_comp; 1248 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1249 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1250 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1251 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1252 for (CeedInt i = 0; i < num_active_bases; i++) { 1253 if ((*data)->active_bases[i] == basis_out) index = i; 1254 } 1255 if (index == -1) { 1256 CeedElemRestriction elem_rstr_out; 1257 index = num_active_bases; 1258 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1259 (*data)->active_bases[num_active_bases] = NULL; 1260 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1261 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1262 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1263 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1264 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1265 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1266 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1267 num_eval_modes_in[index] = 0; 1268 num_eval_modes_out[index] = 0; 1269 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1270 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1271 eval_modes_in[index] = NULL; 1272 eval_modes_out[index] = NULL; 1273 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1274 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1275 eval_mode_offsets_in[index] = NULL; 1276 eval_mode_offsets_out[index] = NULL; 1277 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1278 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1279 (*data)->assembled_bases_in[index] = NULL; 1280 (*data)->assembled_bases_out[index] = NULL; 1281 num_active_bases++; 1282 } 1283 if (eval_mode != CEED_EVAL_WEIGHT) { 1284 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1285 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1286 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1287 for (CeedInt d = 0; d < q_comp; d++) { 1288 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1289 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1290 offset += num_comp; 1291 } 1292 num_eval_modes_out[index] += q_comp; 1293 } 1294 } 1295 } 1296 (*data)->num_output_components = offset; 1297 (*data)->num_eval_modes_out = num_eval_modes_out; 1298 (*data)->eval_modes_out = eval_modes_out; 1299 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1300 (*data)->num_active_bases = num_active_bases; 1301 1302 return CEED_ERROR_SUCCESS; 1303 } 1304 1305 /** 1306 @brief Get CeedOperator CeedEvalModes for assembly. 1307 1308 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1309 1310 @param[in] data CeedOperatorAssemblyData 1311 @param[out] num_active_bases Total number of active bases 1312 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1313 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1314 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1315 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1316 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1317 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1318 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1319 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1320 including contributions of all active bases 1321 1322 @return An error code: 0 - success, otherwise - failure 1323 1324 1325 @ref Backend 1326 **/ 1327 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1328 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1329 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1330 if (num_active_bases) *num_active_bases = data->num_active_bases; 1331 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1332 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1333 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1334 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1335 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1336 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1337 if (num_output_components) *num_output_components = data->num_output_components; 1338 1339 return CEED_ERROR_SUCCESS; 1340 } 1341 1342 /** 1343 @brief Get CeedOperator CeedBasis data for assembly. 1344 1345 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1346 1347 @param[in] data CeedOperatorAssemblyData 1348 @param[out] num_active_bases Number of active bases, or NULL 1349 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1350 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1351 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1352 1353 @return An error code: 0 - success, otherwise - failure 1354 1355 @ref Backend 1356 **/ 1357 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1358 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1359 // Assemble B_in, B_out if needed 1360 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1361 CeedInt num_qpts; 1362 1363 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1364 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1365 CeedInt num_nodes; 1366 CeedScalar *B_in = NULL, *identity = NULL; 1367 bool has_eval_none = false; 1368 1369 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1370 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1371 1372 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1373 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1374 } 1375 if (has_eval_none) { 1376 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1377 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1378 identity[i * num_nodes + i] = 1.0; 1379 } 1380 } 1381 1382 for (CeedInt q = 0; q < num_qpts; q++) { 1383 for (CeedInt n = 0; n < num_nodes; n++) { 1384 CeedInt d_in = 0, q_comp_in; 1385 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1386 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1387 const CeedInt qq = data->num_eval_modes_in[b] * q; 1388 const CeedScalar *B = NULL; 1389 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B); 1390 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1391 if (q_comp_in > 1) { 1392 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1393 else B = &B[(++d_in) * num_qpts * num_nodes]; 1394 } 1395 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1396 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1397 } 1398 } 1399 } 1400 if (identity) CeedCall(CeedFree(&identity)); 1401 data->assembled_bases_in[b] = B_in; 1402 } 1403 } 1404 1405 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1406 CeedInt num_qpts; 1407 1408 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1409 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1410 CeedInt num_nodes; 1411 bool has_eval_none = false; 1412 CeedScalar *B_out = NULL, *identity = NULL; 1413 1414 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1415 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1416 1417 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1418 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1419 } 1420 if (has_eval_none) { 1421 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1422 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1423 identity[i * num_nodes + i] = 1.0; 1424 } 1425 } 1426 1427 for (CeedInt q = 0; q < num_qpts; q++) { 1428 for (CeedInt n = 0; n < num_nodes; n++) { 1429 CeedInt d_out = 0, q_comp_out; 1430 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1431 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1432 const CeedInt qq = data->num_eval_modes_out[b] * q; 1433 const CeedScalar *B = NULL; 1434 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B); 1435 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1436 if (q_comp_out > 1) { 1437 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1438 else B = &B[(++d_out) * num_qpts * num_nodes]; 1439 } 1440 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1441 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1442 } 1443 } 1444 } 1445 if (identity) CeedCall(CeedFree(&identity)); 1446 data->assembled_bases_out[b] = B_out; 1447 } 1448 } 1449 1450 // Pass out assembled data 1451 if (active_bases) *active_bases = data->active_bases; 1452 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1453 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1454 1455 return CEED_ERROR_SUCCESS; 1456 } 1457 1458 /** 1459 @brief Get CeedOperator CeedBasis data for assembly. 1460 1461 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1462 1463 @param[in] data CeedOperatorAssemblyData 1464 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1465 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1466 1467 @return An error code: 0 - success, otherwise - failure 1468 1469 @ref Backend 1470 **/ 1471 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1472 CeedElemRestriction **active_elem_rstrs) { 1473 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1474 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1475 1476 return CEED_ERROR_SUCCESS; 1477 } 1478 1479 /** 1480 @brief Destroy CeedOperatorAssemblyData 1481 1482 @param[in,out] data CeedOperatorAssemblyData to destroy 1483 1484 @return An error code: 0 - success, otherwise - failure 1485 1486 @ref Backend 1487 **/ 1488 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1489 if (!*data) { 1490 *data = NULL; 1491 return CEED_ERROR_SUCCESS; 1492 } 1493 CeedCall(CeedDestroy(&(*data)->ceed)); 1494 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1495 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1496 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1497 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1498 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1499 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1500 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1501 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1502 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1503 } 1504 CeedCall(CeedFree(&(*data)->active_bases)); 1505 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1506 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1507 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1508 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1509 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1510 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1511 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1512 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1513 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1514 1515 CeedCall(CeedFree(data)); 1516 return CEED_ERROR_SUCCESS; 1517 } 1518 1519 /// @} 1520 1521 /// ---------------------------------------------------------------------------- 1522 /// CeedOperator Public API 1523 /// ---------------------------------------------------------------------------- 1524 /// @addtogroup CeedOperatorUser 1525 /// @{ 1526 1527 /** 1528 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1529 1530 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1531 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1532 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1533 1534 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1535 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1536 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1537 1538 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1539 1540 @param[in] op CeedOperator to assemble CeedQFunction 1541 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1542 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1543 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1544 1545 @return An error code: 0 - success, otherwise - failure 1546 1547 @ref User 1548 **/ 1549 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1550 CeedCall(CeedOperatorCheckReady(op)); 1551 1552 if (op->LinearAssembleQFunction) { 1553 // Backend version 1554 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1555 } else { 1556 // Operator fallback 1557 CeedOperator op_fallback; 1558 1559 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1560 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1561 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1562 } 1563 return CEED_ERROR_SUCCESS; 1564 } 1565 1566 /** 1567 @brief Assemble CeedQFunction and store result internally. 1568 1569 Return copied references of stored data to the caller. 1570 Caller is responsible for ownership and destruction of the copied references. 1571 See also @ref CeedOperatorLinearAssembleQFunction 1572 1573 Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers. 1574 These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object. 1575 1576 @param[in] op CeedOperator to assemble CeedQFunction 1577 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1578 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1579 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1580 1581 @return An error code: 0 - success, otherwise - failure 1582 1583 @ref User 1584 **/ 1585 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1586 int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL; 1587 CeedOperator op_assemble = NULL; 1588 1589 CeedCall(CeedOperatorCheckReady(op)); 1590 1591 // Determine if fallback parent or operator has implementation 1592 if (op->op_fallback_parent && op->op_fallback_parent->LinearAssembleQFunctionUpdate) { 1593 // -- Backend version for op fallback parent is faster, if it exists 1594 LinearAssembleQFunctionUpdate = op->op_fallback_parent->LinearAssembleQFunctionUpdate; 1595 op_assemble = op->op_fallback_parent; 1596 } else if (op->LinearAssembleQFunctionUpdate) { 1597 // -- Backend version for op 1598 LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate; 1599 op_assemble = op; 1600 } 1601 1602 // Assemble QFunction 1603 if (LinearAssembleQFunctionUpdate) { 1604 // Backend or fallback parent version 1605 bool qf_assembled_is_setup; 1606 CeedVector assembled_vec = NULL; 1607 CeedElemRestriction assembled_rstr = NULL; 1608 1609 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1610 if (qf_assembled_is_setup) { 1611 bool update_needed; 1612 1613 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1614 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1615 if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request)); 1616 } else { 1617 CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request)); 1618 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1619 } 1620 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1621 1622 // Copy reference from internally held copy 1623 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1624 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1625 CeedCall(CeedVectorDestroy(&assembled_vec)); 1626 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1627 } else { 1628 // Operator fallback 1629 CeedOperator op_fallback; 1630 1631 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1632 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1633 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1634 } 1635 1636 return CEED_ERROR_SUCCESS; 1637 } 1638 1639 /** 1640 @brief Assemble the diagonal of a square linear CeedOperator 1641 1642 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1643 1644 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1645 1646 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1647 1648 @param[in] op CeedOperator to assemble CeedQFunction 1649 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1650 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1651 1652 @return An error code: 0 - success, otherwise - failure 1653 1654 @ref User 1655 **/ 1656 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1657 bool is_composite; 1658 CeedCall(CeedOperatorCheckReady(op)); 1659 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1660 1661 CeedSize input_size = 0, output_size = 0; 1662 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1663 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1664 1665 // Early exit for empty operator 1666 if (!is_composite) { 1667 CeedInt num_elem = 0; 1668 1669 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1670 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1671 } 1672 1673 if (op->LinearAssembleDiagonal) { 1674 // Backend version 1675 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1676 return CEED_ERROR_SUCCESS; 1677 } else if (op->LinearAssembleAddDiagonal) { 1678 // Backend version with zeroing first 1679 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1680 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1681 return CEED_ERROR_SUCCESS; 1682 } else { 1683 // Operator fallback 1684 CeedOperator op_fallback; 1685 1686 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1687 if (op_fallback) { 1688 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1689 return CEED_ERROR_SUCCESS; 1690 } 1691 } 1692 // Default interface implementation 1693 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1694 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1695 1696 return CEED_ERROR_SUCCESS; 1697 } 1698 1699 /** 1700 @brief Assemble the diagonal of a square linear CeedOperator 1701 1702 This sums into a CeedVector the diagonal of a linear CeedOperator. 1703 1704 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1705 1706 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1707 1708 @param[in] op CeedOperator to assemble CeedQFunction 1709 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1710 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1711 1712 @return An error code: 0 - success, otherwise - failure 1713 1714 @ref User 1715 **/ 1716 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1717 bool is_composite; 1718 CeedCall(CeedOperatorCheckReady(op)); 1719 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1720 1721 CeedSize input_size = 0, output_size = 0; 1722 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1723 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1724 1725 // Early exit for empty operator 1726 if (!is_composite) { 1727 CeedInt num_elem = 0; 1728 1729 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1730 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1731 } 1732 1733 if (op->LinearAssembleAddDiagonal) { 1734 // Backend version 1735 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1736 return CEED_ERROR_SUCCESS; 1737 } else { 1738 // Operator fallback 1739 CeedOperator op_fallback; 1740 1741 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1742 if (op_fallback) { 1743 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1744 return CEED_ERROR_SUCCESS; 1745 } 1746 } 1747 // Default interface implementation 1748 if (is_composite) { 1749 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1750 } else { 1751 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1752 } 1753 1754 return CEED_ERROR_SUCCESS; 1755 } 1756 1757 /** 1758 @brief Assemble the point block diagonal of a square linear CeedOperator 1759 1760 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1761 1762 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1763 1764 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1765 1766 @param[in] op CeedOperator to assemble CeedQFunction 1767 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1768 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1769 component in]. 1770 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1771 1772 @return An error code: 0 - success, otherwise - failure 1773 1774 @ref User 1775 **/ 1776 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1777 bool is_composite; 1778 CeedCall(CeedOperatorCheckReady(op)); 1779 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1780 1781 CeedSize input_size = 0, output_size = 0; 1782 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1783 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1784 1785 // Early exit for empty operator 1786 if (!is_composite) { 1787 CeedInt num_elem = 0; 1788 1789 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1790 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1791 } 1792 1793 if (op->LinearAssemblePointBlockDiagonal) { 1794 // Backend version 1795 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1796 return CEED_ERROR_SUCCESS; 1797 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1798 // Backend version with zeroing first 1799 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1800 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1801 return CEED_ERROR_SUCCESS; 1802 } else { 1803 // Operator fallback 1804 CeedOperator op_fallback; 1805 1806 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1807 if (op_fallback) { 1808 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1809 return CEED_ERROR_SUCCESS; 1810 } 1811 } 1812 // Default interface implementation 1813 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1814 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1815 1816 return CEED_ERROR_SUCCESS; 1817 } 1818 1819 /** 1820 @brief Assemble the point block diagonal of a square linear CeedOperator 1821 1822 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1823 1824 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1825 1826 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1827 1828 @param[in] op CeedOperator to assemble CeedQFunction 1829 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1830 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1831 component in]. 1832 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1833 1834 @return An error code: 0 - success, otherwise - failure 1835 1836 @ref User 1837 **/ 1838 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1839 bool is_composite; 1840 CeedCall(CeedOperatorCheckReady(op)); 1841 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1842 1843 CeedSize input_size = 0, output_size = 0; 1844 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1845 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1846 1847 // Early exit for empty operator 1848 if (!is_composite) { 1849 CeedInt num_elem = 0; 1850 1851 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1852 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1853 } 1854 1855 if (op->LinearAssembleAddPointBlockDiagonal) { 1856 // Backend version 1857 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1858 return CEED_ERROR_SUCCESS; 1859 } else { 1860 // Operator fallback 1861 CeedOperator op_fallback; 1862 1863 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1864 if (op_fallback) { 1865 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1866 return CEED_ERROR_SUCCESS; 1867 } 1868 } 1869 // Default interface implementation 1870 if (is_composite) { 1871 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1872 } else { 1873 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1874 } 1875 1876 return CEED_ERROR_SUCCESS; 1877 } 1878 1879 /** 1880 @brief Fully assemble the nonzero pattern of a linear operator. 1881 1882 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1883 1884 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1885 matrix in entry (i, j). 1886 Note that the (i, j) pairs are not unique and may repeat. 1887 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1888 1889 This will generally be slow unless your operator is low-order. 1890 1891 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1892 1893 @param[in] op CeedOperator to assemble 1894 @param[out] num_entries Number of entries in coordinate nonzero pattern 1895 @param[out] rows Row number for each entry 1896 @param[out] cols Column number for each entry 1897 1898 @ref User 1899 **/ 1900 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1901 CeedInt num_suboperators; 1902 CeedSize single_entries; 1903 CeedOperator *sub_operators; 1904 bool is_composite; 1905 CeedCall(CeedOperatorCheckReady(op)); 1906 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1907 1908 if (op->LinearAssembleSymbolic) { 1909 // Backend version 1910 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1911 return CEED_ERROR_SUCCESS; 1912 } else { 1913 // Operator fallback 1914 CeedOperator op_fallback; 1915 1916 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1917 if (op_fallback) { 1918 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1919 return CEED_ERROR_SUCCESS; 1920 } 1921 } 1922 1923 // Default interface implementation 1924 1925 // count entries and allocate rows, cols arrays 1926 *num_entries = 0; 1927 if (is_composite) { 1928 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1929 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1930 for (CeedInt k = 0; k < num_suboperators; ++k) { 1931 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1932 *num_entries += single_entries; 1933 } 1934 } else { 1935 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1936 *num_entries += single_entries; 1937 } 1938 CeedCall(CeedCalloc(*num_entries, rows)); 1939 CeedCall(CeedCalloc(*num_entries, cols)); 1940 1941 // assemble nonzero locations 1942 CeedInt offset = 0; 1943 if (is_composite) { 1944 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1945 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1946 for (CeedInt k = 0; k < num_suboperators; ++k) { 1947 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1948 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1949 offset += single_entries; 1950 } 1951 } else { 1952 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1953 } 1954 1955 return CEED_ERROR_SUCCESS; 1956 } 1957 1958 /** 1959 @brief Fully assemble the nonzero entries of a linear operator. 1960 1961 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1962 1963 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1964 matrix in entry (i, j). 1965 Note that the (i, j) pairs are not unique and may repeat. 1966 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1967 1968 This will generally be slow unless your operator is low-order. 1969 1970 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1971 1972 @param[in] op CeedOperator to assemble 1973 @param[out] values Values to assemble into matrix 1974 1975 @ref User 1976 **/ 1977 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1978 CeedInt num_suboperators; 1979 CeedSize single_entries = 0; 1980 CeedOperator *sub_operators; 1981 bool is_composite; 1982 CeedCall(CeedOperatorCheckReady(op)); 1983 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1984 1985 // Early exit for empty operator 1986 if (!is_composite) { 1987 CeedInt num_elem = 0; 1988 1989 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1990 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1991 } 1992 1993 if (op->LinearAssemble) { 1994 // Backend version 1995 CeedCall(op->LinearAssemble(op, values)); 1996 return CEED_ERROR_SUCCESS; 1997 } else { 1998 // Operator fallback 1999 CeedOperator op_fallback; 2000 2001 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2002 if (op_fallback) { 2003 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2004 return CEED_ERROR_SUCCESS; 2005 } 2006 } 2007 2008 // Default interface implementation 2009 CeedInt offset = 0; 2010 CeedCall(CeedVectorSetValue(values, 0.0)); 2011 if (is_composite) { 2012 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2013 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2014 for (CeedInt k = 0; k < num_suboperators; k++) { 2015 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2016 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2017 offset += single_entries; 2018 } 2019 } else { 2020 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2021 } 2022 2023 return CEED_ERROR_SUCCESS; 2024 } 2025 2026 /** 2027 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2028 2029 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2030 2031 @param[in] op Composite CeedOperator 2032 @param[in] num_skip_indices Number of suboperators to skip 2033 @param[in] skip_indices Array of indices of suboperators to skip 2034 @param[out] mult Vector to store multiplicity (of size l_size) 2035 2036 @return An error code: 0 - success, otherwise - failure 2037 2038 @ref User 2039 **/ 2040 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2041 CeedCall(CeedOperatorCheckReady(op)); 2042 2043 Ceed ceed; 2044 CeedInt num_suboperators; 2045 CeedSize l_vec_len; 2046 CeedScalar *mult_array; 2047 CeedVector ones_l_vec; 2048 CeedElemRestriction elem_rstr, mult_elem_rstr; 2049 CeedOperator *sub_operators; 2050 2051 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2052 2053 // Zero mult vector 2054 CeedCall(CeedVectorSetValue(mult, 0.0)); 2055 2056 // Get suboperators 2057 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2058 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2059 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2060 2061 // Work vector 2062 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2063 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2064 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2065 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2066 2067 // Compute multiplicity across suboperators 2068 for (CeedInt i = 0; i < num_suboperators; i++) { 2069 const CeedScalar *sub_mult_array; 2070 CeedVector sub_mult_l_vec, ones_e_vec; 2071 2072 // -- Check for suboperator to skip 2073 for (CeedInt j = 0; j < num_skip_indices; j++) { 2074 if (skip_indices[j] == i) continue; 2075 } 2076 2077 // -- Sub operator multiplicity 2078 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2079 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr)); 2080 CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2081 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2082 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2083 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2084 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2085 // ---- Flag every node present in the current suboperator 2086 for (CeedInt j = 0; j < l_vec_len; j++) { 2087 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2088 } 2089 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2090 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2091 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2092 CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr)); 2093 } 2094 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2095 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2096 2097 return CEED_ERROR_SUCCESS; 2098 } 2099 2100 /** 2101 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2102 grid interpolation 2103 2104 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2105 2106 @param[in] op_fine Fine grid operator 2107 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2108 @param[in] rstr_coarse Coarse grid restriction 2109 @param[in] basis_coarse Coarse grid active vector basis 2110 @param[out] op_coarse Coarse grid operator 2111 @param[out] op_prolong Coarse to fine operator, or NULL 2112 @param[out] op_restrict Fine to coarse operator, or NULL 2113 2114 @return An error code: 0 - success, otherwise - failure 2115 2116 @ref User 2117 **/ 2118 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2119 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2120 CeedCall(CeedOperatorCheckReady(op_fine)); 2121 2122 // Build prolongation matrix, if required 2123 CeedBasis basis_c_to_f = NULL; 2124 if (op_prolong || op_restrict) { 2125 CeedBasis basis_fine; 2126 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2127 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2128 } 2129 2130 // Core code 2131 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2132 2133 return CEED_ERROR_SUCCESS; 2134 } 2135 2136 /** 2137 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2138 2139 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2140 2141 @param[in] op_fine Fine grid operator 2142 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2143 @param[in] rstr_coarse Coarse grid restriction 2144 @param[in] basis_coarse Coarse grid active vector basis 2145 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2146 @param[out] op_coarse Coarse grid operator 2147 @param[out] op_prolong Coarse to fine operator, or NULL 2148 @param[out] op_restrict Fine to coarse operator, or NULL 2149 2150 @return An error code: 0 - success, otherwise - failure 2151 2152 @ref User 2153 **/ 2154 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2155 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2156 CeedOperator *op_restrict) { 2157 CeedCall(CeedOperatorCheckReady(op_fine)); 2158 Ceed ceed; 2159 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2160 2161 // Check for compatible quadrature spaces 2162 CeedBasis basis_fine; 2163 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2164 CeedInt Q_f, Q_c; 2165 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2166 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2167 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2168 2169 // Create coarse to fine basis, if required 2170 CeedBasis basis_c_to_f = NULL; 2171 if (op_prolong || op_restrict) { 2172 // Check if interpolation matrix is provided 2173 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2174 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2175 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2176 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2177 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2178 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2179 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2180 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2181 CeedScalar *q_ref, *q_weight, *grad; 2182 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2183 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2184 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2185 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2186 CeedCall(CeedFree(&q_ref)); 2187 CeedCall(CeedFree(&q_weight)); 2188 CeedCall(CeedFree(&grad)); 2189 } 2190 2191 // Core code 2192 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2193 return CEED_ERROR_SUCCESS; 2194 } 2195 2196 /** 2197 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2198 2199 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2200 2201 @param[in] op_fine Fine grid operator 2202 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2203 @param[in] rstr_coarse Coarse grid restriction 2204 @param[in] basis_coarse Coarse grid active vector basis 2205 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2206 @param[out] op_coarse Coarse grid operator 2207 @param[out] op_prolong Coarse to fine operator, or NULL 2208 @param[out] op_restrict Fine to coarse operator, or NULL 2209 2210 @return An error code: 0 - success, otherwise - failure 2211 2212 @ref User 2213 **/ 2214 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2215 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2216 CeedOperator *op_restrict) { 2217 CeedCall(CeedOperatorCheckReady(op_fine)); 2218 Ceed ceed; 2219 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2220 2221 // Check for compatible quadrature spaces 2222 CeedBasis basis_fine; 2223 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2224 CeedInt Q_f, Q_c; 2225 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2226 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2227 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2228 2229 // Coarse to fine basis 2230 CeedBasis basis_c_to_f = NULL; 2231 if (op_prolong || op_restrict) { 2232 // Check if interpolation matrix is provided 2233 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2234 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2235 CeedElemTopology topo; 2236 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2237 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2238 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2239 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2240 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2241 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2242 CeedScalar *q_ref, *q_weight, *grad; 2243 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2244 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2245 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2246 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2247 CeedCall(CeedFree(&q_ref)); 2248 CeedCall(CeedFree(&q_weight)); 2249 CeedCall(CeedFree(&grad)); 2250 } 2251 2252 // Core code 2253 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2254 return CEED_ERROR_SUCCESS; 2255 } 2256 2257 /** 2258 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2259 2260 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2261 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2262 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2263 \hat S V\f$. 2264 The CeedOperator must be linear and non-composite. 2265 The associated CeedQFunction must therefore also be linear. 2266 2267 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2268 2269 @param[in] op CeedOperator to create element inverses 2270 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2271 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2272 2273 @return An error code: 0 - success, otherwise - failure 2274 2275 @ref User 2276 **/ 2277 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2278 CeedCall(CeedOperatorCheckReady(op)); 2279 2280 if (op->CreateFDMElementInverse) { 2281 // Backend version 2282 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2283 return CEED_ERROR_SUCCESS; 2284 } else { 2285 // Operator fallback 2286 CeedOperator op_fallback; 2287 2288 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2289 if (op_fallback) { 2290 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2291 return CEED_ERROR_SUCCESS; 2292 } 2293 } 2294 2295 // Default interface implementation 2296 Ceed ceed, ceed_parent; 2297 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2298 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2299 ceed_parent = ceed_parent ? ceed_parent : ceed; 2300 CeedQFunction qf; 2301 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2302 2303 // Determine active input basis 2304 bool interp = false, grad = false; 2305 CeedBasis basis = NULL; 2306 CeedElemRestriction rstr = NULL; 2307 CeedOperatorField *op_fields; 2308 CeedQFunctionField *qf_fields; 2309 CeedInt num_input_fields; 2310 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2311 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2312 for (CeedInt i = 0; i < num_input_fields; i++) { 2313 CeedVector vec; 2314 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2315 if (vec == CEED_VECTOR_ACTIVE) { 2316 CeedEvalMode eval_mode; 2317 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2318 interp = interp || eval_mode == CEED_EVAL_INTERP; 2319 grad = grad || eval_mode == CEED_EVAL_GRAD; 2320 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2321 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2322 } 2323 } 2324 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2325 CeedSize l_size = 1; 2326 CeedInt P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2327 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2328 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2329 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2330 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2331 CeedCall(CeedBasisGetDimension(basis, &dim)); 2332 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2333 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2334 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2335 2336 // Build and diagonalize 1D Mass and Laplacian 2337 bool is_tensor_basis; 2338 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2339 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2340 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2341 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2342 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2343 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2344 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2345 CeedCall(CeedCalloc(P_1d, &lambda)); 2346 // -- Build matrices 2347 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2348 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2349 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2350 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2351 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2352 2353 // -- Diagonalize 2354 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2355 CeedCall(CeedFree(&mass)); 2356 CeedCall(CeedFree(&laplace)); 2357 for (CeedInt i = 0; i < P_1d; i++) { 2358 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2359 } 2360 CeedCall(CeedFree(&x)); 2361 2362 // Assemble QFunction 2363 CeedVector assembled = NULL; 2364 CeedElemRestriction rstr_qf = NULL; 2365 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2366 CeedInt layout[3]; 2367 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2368 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2369 CeedScalar max_norm = 0; 2370 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2371 2372 // Calculate element averages 2373 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2374 CeedScalar *elem_avg; 2375 const CeedScalar *assembled_array, *q_weight_array; 2376 CeedVector q_weight; 2377 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2378 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2379 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2380 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2381 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2382 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2383 for (CeedInt e = 0; e < num_elem; e++) { 2384 CeedInt count = 0; 2385 for (CeedInt q = 0; q < num_qpts; q++) { 2386 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2387 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2388 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2389 count++; 2390 } 2391 } 2392 } 2393 if (count) { 2394 elem_avg[e] /= count; 2395 } else { 2396 elem_avg[e] = 1.0; 2397 } 2398 } 2399 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2400 CeedCall(CeedVectorDestroy(&assembled)); 2401 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2402 CeedCall(CeedVectorDestroy(&q_weight)); 2403 2404 // Build FDM diagonal 2405 CeedVector q_data; 2406 CeedScalar *q_data_array, *fdm_diagonal; 2407 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2408 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2409 for (CeedInt c = 0; c < num_comp; c++) { 2410 for (CeedInt n = 0; n < num_nodes; n++) { 2411 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2412 if (grad) { 2413 for (CeedInt d = 0; d < dim; d++) { 2414 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2415 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2416 } 2417 } 2418 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2419 } 2420 } 2421 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2422 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2423 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2424 for (CeedInt e = 0; e < num_elem; e++) { 2425 for (CeedInt c = 0; c < num_comp; c++) { 2426 for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2427 } 2428 } 2429 CeedCall(CeedFree(&elem_avg)); 2430 CeedCall(CeedFree(&fdm_diagonal)); 2431 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2432 2433 // Setup FDM operator 2434 // -- Basis 2435 CeedBasis fdm_basis; 2436 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2437 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2438 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2439 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2440 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2441 CeedCall(CeedFree(&fdm_interp)); 2442 CeedCall(CeedFree(&grad_dummy)); 2443 CeedCall(CeedFree(&q_ref_dummy)); 2444 CeedCall(CeedFree(&q_weight_dummy)); 2445 CeedCall(CeedFree(&lambda)); 2446 2447 // -- Restriction 2448 CeedElemRestriction rstr_qd_i; 2449 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2450 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2451 // -- QFunction 2452 CeedQFunction qf_fdm; 2453 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2454 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2455 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2456 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2457 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2458 // -- QFunction context 2459 CeedInt *num_comp_data; 2460 CeedCall(CeedCalloc(1, &num_comp_data)); 2461 num_comp_data[0] = num_comp; 2462 CeedQFunctionContext ctx_fdm; 2463 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2464 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2465 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2466 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2467 // -- Operator 2468 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2469 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2470 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2471 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2472 2473 // Cleanup 2474 CeedCall(CeedVectorDestroy(&q_data)); 2475 CeedCall(CeedBasisDestroy(&fdm_basis)); 2476 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2477 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2478 2479 return CEED_ERROR_SUCCESS; 2480 } 2481 2482 /// @} 2483