1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 char *source_path_with_name = NULL; 39 40 // Check if NULL qf passed in 41 if (!qf) return CEED_ERROR_SUCCESS; 42 43 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 44 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 45 46 if (qf->source_path) { 47 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 48 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 49 memcpy(source_path_with_name, qf->source_path, path_len); 50 memcpy(&source_path_with_name[path_len], ":", 1); 51 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 52 } else { 53 CeedCall(CeedCalloc(1, &source_path_with_name)); 54 } 55 56 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 57 { 58 CeedQFunctionContext ctx; 59 60 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 61 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 62 } 63 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 64 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 65 } 66 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 67 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 68 } 69 CeedCall(CeedFree(&source_path_with_name)); 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 Ceed ceed_fallback; 84 bool is_composite; 85 CeedOperator op_fallback; 86 87 // Check not already created 88 if (op->op_fallback) return CEED_ERROR_SUCCESS; 89 90 // Fallback Ceed 91 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 92 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 93 94 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 95 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 96 97 // Clone Op 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 117 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 118 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 119 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 120 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 121 op->input_fields[i]->vec)); 122 } 123 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 124 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 125 op->output_fields[i]->vec)); 126 } 127 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 128 if (op_fallback->num_qpts == 0) CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 // Cleanup 130 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 131 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 133 } 134 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 135 CeedCall(CeedOperatorCheckReady(op_fallback)); 136 // Note: No ref-counting here so we don't get caught in a reference loop. 137 // The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback. 138 op->op_fallback = op_fallback; 139 op_fallback->op_fallback_parent = op; 140 return CEED_ERROR_SUCCESS; 141 } 142 143 /** 144 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 145 146 @param[in] op CeedOperator to retrieve fallback for 147 @param[out] op_fallback Fallback CeedOperator 148 149 @return An error code: 0 - success, otherwise - failure 150 151 @ref Developer 152 **/ 153 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 154 // Create if needed 155 if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op)); 156 if (op->op_fallback) { 157 bool is_debug; 158 159 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 160 if (is_debug) { 161 Ceed ceed, ceed_fallback; 162 const char *resource, *resource_fallback; 163 164 CeedCall(CeedOperatorGetCeed(op, &ceed)); 165 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 166 CeedCall(CeedGetResource(ceed, &resource)); 167 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 168 169 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n"); 170 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 171 op->op_fallback); 172 } 173 } 174 *op_fallback = op->op_fallback; 175 return CEED_ERROR_SUCCESS; 176 } 177 178 /** 179 @brief Select correct basis matrix pointer based on CeedEvalMode 180 181 @param[in] basis CeedBasis from which to get the basis matrix 182 @param[in] eval_mode Current basis evaluation mode 183 @param[in] identity Pointer to identity matrix 184 @param[out] basis_ptr Basis pointer to set 185 186 @ref Developer 187 **/ 188 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 189 switch (eval_mode) { 190 case CEED_EVAL_NONE: 191 *basis_ptr = identity; 192 break; 193 case CEED_EVAL_INTERP: 194 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 195 break; 196 case CEED_EVAL_GRAD: 197 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 198 break; 199 case CEED_EVAL_DIV: 200 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 201 break; 202 case CEED_EVAL_CURL: 203 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 204 break; 205 case CEED_EVAL_WEIGHT: 206 break; // Caught by QF Assembly 207 } 208 assert(*basis_ptr != NULL); 209 return CEED_ERROR_SUCCESS; 210 } 211 212 /** 213 @brief Create point block restriction for active operator field 214 215 @param[in] rstr Original CeedElemRestriction for active field 216 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 217 218 @return An error code: 0 - success, otherwise - failure 219 220 @ref Developer 221 **/ 222 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 223 Ceed ceed; 224 CeedInt num_elem, num_comp, shift, elem_size, comp_stride, *pointblock_offsets; 225 CeedSize l_size; 226 const CeedInt *offsets; 227 228 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 229 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 230 231 // Expand offsets 232 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 233 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 234 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 235 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 236 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 237 shift = num_comp; 238 if (comp_stride != 1) shift *= num_comp; 239 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 240 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 241 pointblock_offsets[i] = offsets[i] * shift; 242 } 243 244 // Create new restriction 245 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 246 pointblock_offsets, pointblock_rstr)); 247 248 // Cleanup 249 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 250 return CEED_ERROR_SUCCESS; 251 } 252 253 /** 254 @brief Core logic for assembling operator diagonal or point block diagonal 255 256 @param[in] op CeedOperator to assemble point block diagonal 257 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 258 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 259 @param[out] assembled CeedVector to store assembled diagonal 260 261 @return An error code: 0 - success, otherwise - failure 262 263 @ref Developer 264 **/ 265 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 266 Ceed ceed; 267 CeedInt num_input_fields, num_output_fields; 268 CeedInt layout[3]; 269 const CeedScalar *assembled_qf_array; 270 CeedVector assembled_qf = NULL; 271 CeedElemRestriction assembled_elem_rstr = NULL; 272 CeedQFunction qf; 273 274 CeedCall(CeedOperatorGetCeed(op, &ceed)); 275 276 // Assemble QFunction 277 CeedCall(CeedOperatorGetQFunction(op, &qf)); 278 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 279 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 280 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 281 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 282 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 283 284 // Get assembly data 285 const CeedEvalMode **eval_modes_in, **eval_modes_out; 286 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 287 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 288 CeedElemRestriction *active_elem_rstrs; 289 CeedBasis *active_bases; 290 CeedOperatorAssemblyData data; 291 292 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 293 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 294 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 295 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 296 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 297 298 // Loop over all active bases 299 for (CeedInt b = 0; b < num_active_bases; b++) { 300 bool has_eval_none = false; 301 CeedInt num_elem, num_nodes, num_qpts, num_components; 302 CeedScalar *elem_diag_array, *identity = NULL; 303 CeedVector elem_diag; 304 CeedElemRestriction diag_elem_rstr; 305 306 // Assemble point block diagonal restriction, if needed 307 if (is_pointblock) { 308 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs[b], &diag_elem_rstr)); 309 } else { 310 CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs[b], &diag_elem_rstr)); 311 } 312 313 // Create diagonal vector 314 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 315 316 // Assemble element operator diagonals 317 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 318 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 319 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 320 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 321 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 322 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 323 324 // Construct identity matrix for basis if required 325 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 326 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 327 } 328 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 329 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 330 } 331 if (has_eval_none) { 332 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 333 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 334 } 335 336 // Compute the diagonal of B^T D B 337 // Each element 338 for (CeedSize e = 0; e < num_elem; e++) { 339 // Each basis eval mode pair 340 CeedInt d_out = 0, q_comp_out; 341 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 342 343 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 344 CeedInt d_in = 0, q_comp_in; 345 const CeedScalar *B_t = NULL; 346 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 347 348 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t); 349 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out)); 350 if (q_comp_out > 1) { 351 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 352 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 353 } 354 eval_mode_out_prev = eval_modes_out[b][e_out]; 355 356 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 357 const CeedScalar *B = NULL; 358 359 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B); 360 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in)); 361 if (q_comp_in > 1) { 362 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 363 else B = &B[(++d_in) * num_qpts * num_nodes]; 364 } 365 eval_mode_in_prev = eval_modes_in[b][e_in]; 366 367 // Each component 368 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 369 // Each qpt/node pair 370 for (CeedInt q = 0; q < num_qpts; q++) { 371 if (is_pointblock) { 372 // Point Block Diagonal 373 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 374 const CeedSize c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 375 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 376 377 for (CeedInt n = 0; n < num_nodes; n++) { 378 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 379 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 380 } 381 } 382 } else { 383 // Diagonal Only 384 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 385 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 386 387 for (CeedInt n = 0; n < num_nodes; n++) { 388 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 389 } 390 } 391 } 392 } 393 } 394 } 395 } 396 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 397 398 // Assemble local operator diagonal 399 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 400 401 // Cleanup 402 CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 403 CeedCall(CeedVectorDestroy(&elem_diag)); 404 CeedCall(CeedFree(&identity)); 405 } 406 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 407 CeedCall(CeedVectorDestroy(&assembled_qf)); 408 return CEED_ERROR_SUCCESS; 409 } 410 411 /** 412 @brief Core logic for assembling composite operator diagonal 413 414 @param[in] op CeedOperator to assemble point block diagonal 415 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 416 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 417 @param[out] assembled CeedVector to store assembled diagonal 418 419 @return An error code: 0 - success, otherwise - failure 420 421 @ref Developer 422 **/ 423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 424 CeedVector assembled) { 425 CeedInt num_sub; 426 CeedOperator *suboperators; 427 428 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 429 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 430 for (CeedInt i = 0; i < num_sub; i++) { 431 if (is_pointblock) { 432 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 433 } else { 434 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 435 } 436 } 437 return CEED_ERROR_SUCCESS; 438 } 439 440 /** 441 @brief Build nonzero pattern for non-composite operator 442 443 Users should generally use CeedOperatorLinearAssembleSymbolic() 444 445 @param[in] op CeedOperator to assemble nonzero pattern 446 @param[in] offset Offset for number of entries 447 @param[out] rows Row number for each entry 448 @param[out] cols Column number for each entry 449 450 @return An error code: 0 - success, otherwise - failure 451 452 @ref Developer 453 **/ 454 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 455 Ceed ceed; 456 bool is_composite; 457 CeedInt num_elem, elem_size, num_comp, layout_er[3], local_num_entries; 458 CeedSize num_nodes, count = 0; 459 CeedScalar *array; 460 const CeedScalar *elem_dof_a; 461 CeedVector index_vec, elem_dof; 462 CeedElemRestriction active_rstr, index_elem_rstr; 463 464 CeedCall(CeedOperatorGetCeed(op, &ceed)); 465 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 466 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 467 468 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 469 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 470 CeedCall(CeedElemRestrictionCreateUnorientedCopy(active_rstr, &index_elem_rstr)); 471 CeedCall(CeedElemRestrictionGetNumElements(index_elem_rstr, &num_elem)); 472 CeedCall(CeedElemRestrictionGetElementSize(index_elem_rstr, &elem_size)); 473 CeedCall(CeedElemRestrictionGetNumComponents(index_elem_rstr, &num_comp)); 474 CeedCall(CeedElemRestrictionGetELayout(index_elem_rstr, &layout_er)); 475 local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 476 477 // Determine elem_dof relation 478 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 479 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 480 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 481 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 482 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 483 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 484 CeedCall(CeedElemRestrictionApply(index_elem_rstr, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 485 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 486 CeedCall(CeedVectorDestroy(&index_vec)); 487 488 // Determine i, j locations for element matrices 489 for (CeedInt e = 0; e < num_elem; e++) { 490 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 491 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 492 for (CeedInt i = 0; i < elem_size; i++) { 493 for (CeedInt j = 0; j < elem_size; j++) { 494 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 495 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 496 const CeedInt row = elem_dof_a[elem_dof_index_row]; 497 const CeedInt col = elem_dof_a[elem_dof_index_col]; 498 499 rows[offset + count] = row; 500 cols[offset + count] = col; 501 count++; 502 } 503 } 504 } 505 } 506 } 507 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 508 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 509 CeedCall(CeedVectorDestroy(&elem_dof)); 510 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr)); 511 return CEED_ERROR_SUCCESS; 512 } 513 514 /** 515 @brief Assemble nonzero entries for non-composite operator 516 517 Users should generally use CeedOperatorLinearAssemble() 518 519 @param[in] op CeedOperator to assemble 520 @param[in] offset Offset for number of entries 521 @param[out] values Values to assemble into matrix 522 523 @return An error code: 0 - success, otherwise - failure 524 525 @ref Developer 526 **/ 527 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 528 Ceed ceed; 529 bool is_composite; 530 531 CeedCall(CeedOperatorGetCeed(op, &ceed)); 532 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 533 534 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 535 536 // Early exit for empty operator 537 { 538 CeedInt num_elem = 0; 539 540 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 541 if (num_elem == 0) return CEED_ERROR_SUCCESS; 542 } 543 544 if (op->LinearAssembleSingle) { 545 // Backend version 546 CeedCall(op->LinearAssembleSingle(op, offset, values)); 547 return CEED_ERROR_SUCCESS; 548 } else { 549 // Operator fallback 550 CeedOperator op_fallback; 551 552 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 553 if (op_fallback) { 554 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 555 return CEED_ERROR_SUCCESS; 556 } 557 } 558 559 // Assemble QFunction 560 const bool *orients = NULL; 561 const CeedInt8 *curl_orients = NULL; 562 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases, num_input_fields, num_output_fields, num_elem, elem_size, num_qpts, num_comp, 563 local_num_entries, layout_qf[3]; 564 const CeedScalar *assembled_qf_array; 565 CeedVector assembled_qf = NULL; 566 CeedRestrictionType rstr_type; 567 CeedElemRestriction rstr_q = NULL, active_rstr; 568 const CeedEvalMode **eval_modes_in, **eval_modes_out; 569 CeedBasis *bases, basis_in; 570 CeedQFunction qf; 571 CeedOperatorAssemblyData data; 572 CeedOperatorField *input_fields, *output_fields; 573 574 CeedCall(CeedOperatorGetQFunction(op, &qf)); 575 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 576 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 577 578 // Get assembly data 579 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 580 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 581 &eval_modes_out, NULL, NULL)); 582 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 583 basis_in = bases[0]; 584 585 CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 586 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 587 588 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 589 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 590 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 591 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 592 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 593 local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 594 595 CeedCall(CeedElemRestrictionGetType(active_rstr, &rstr_type)); 596 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 597 CeedCall(CeedElemRestrictionGetOrientations(active_rstr, CEED_MEM_HOST, &orients)); 598 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 599 CeedCall(CeedElemRestrictionGetCurlOrientations(active_rstr, CEED_MEM_HOST, &curl_orients)); 600 } 601 602 // Loop over elements and put in data structure 603 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 604 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 605 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 606 607 // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 608 CeedSize count = 0; 609 CeedScalar *vals, BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]], elem_mat[elem_size * elem_size]; 610 const CeedScalar **B_mats_in, **B_mats_out; 611 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 612 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 613 614 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 615 for (CeedSize e = 0; e < num_elem; e++) { 616 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 617 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 618 // Compute B^T*D 619 for (CeedSize n = 0; n < elem_size; n++) { 620 for (CeedSize q = 0; q < num_qpts; q++) { 621 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 622 const CeedSize btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 623 CeedScalar sum = 0.0; 624 625 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 626 const CeedSize b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 627 const CeedSize eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 628 const CeedSize qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 629 630 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 631 } 632 BTD_mat[btd_index] = sum; 633 } 634 } 635 } 636 637 // Form element matrix itself (for each block component) 638 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 639 640 // Transform the element matrix if required 641 if (orients) { 642 const bool *elem_orients = &orients[e * elem_size]; 643 644 for (CeedInt i = 0; i < elem_size; i++) { 645 for (CeedInt j = 0; j < elem_size; j++) { 646 elem_mat[i * elem_size + j] *= elem_orients[i] ? -1.0 : 1.0; 647 elem_mat[i * elem_size + j] *= elem_orients[j] ? -1.0 : 1.0; 648 } 649 } 650 } else if (curl_orients) { 651 const CeedInt8 *elem_curl_orients = &curl_orients[e * 3 * elem_size]; 652 CeedScalar o_elem_mat[elem_size * elem_size]; 653 654 // T^T*(B^T*D*B) 655 for (CeedInt i = 0; i < elem_size; i++) { 656 for (CeedInt j = 0; j < elem_size; j++) { 657 o_elem_mat[i * elem_size + j] = elem_mat[i * elem_size + j] * elem_curl_orients[3 * i + 1] + 658 (i > 0 ? elem_mat[(i - 1) * elem_size + j] * elem_curl_orients[3 * i - 1] : 0.0) + 659 (i < elem_size - 1 ? elem_mat[(i + 1) * elem_size + j] * elem_curl_orients[3 * i + 3] : 0.0); 660 } 661 } 662 // T^T*(B^T*D*B)*T 663 for (CeedInt i = 0; i < elem_size; i++) { 664 for (CeedInt j = 0; j < elem_size; j++) { 665 elem_mat[i * elem_size + j] = o_elem_mat[i * elem_size + j] * elem_curl_orients[3 * j + 1] + 666 (j > 0 ? o_elem_mat[i * elem_size + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) + 667 (j < elem_size - 1 ? o_elem_mat[i * elem_size + j + 1] * elem_curl_orients[3 * j + 3] : 0.0); 668 } 669 } 670 } 671 672 // Put element matrix in coordinate data structure 673 for (CeedInt i = 0; i < elem_size; i++) { 674 for (CeedInt j = 0; j < elem_size; j++) { 675 vals[offset + count] = elem_mat[i * elem_size + j]; 676 count++; 677 } 678 } 679 } 680 } 681 } 682 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 683 CeedCall(CeedVectorRestoreArray(values, &vals)); 684 685 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 686 CeedCall(CeedVectorDestroy(&assembled_qf)); 687 688 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 689 CeedCall(CeedElemRestrictionRestoreOrientations(active_rstr, &orients)); 690 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 691 CeedCall(CeedElemRestrictionRestoreCurlOrientations(active_rstr, &curl_orients)); 692 } 693 return CEED_ERROR_SUCCESS; 694 } 695 696 /** 697 @brief Count number of entries for assembled CeedOperator 698 699 @param[in] op CeedOperator to assemble 700 @param[out] num_entries Number of entries in assembled representation 701 702 @return An error code: 0 - success, otherwise - failure 703 704 @ref Utility 705 **/ 706 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) { 707 bool is_composite; 708 CeedInt num_elem, elem_size, num_comp; 709 CeedElemRestriction rstr; 710 711 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 712 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 713 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 714 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 715 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 716 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 717 *num_entries = (CeedSize)elem_size * num_comp * elem_size * num_comp * num_elem; 718 return CEED_ERROR_SUCCESS; 719 } 720 721 /** 722 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 723 724 @param[in] op_fine Fine grid operator 725 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 726 @param[in] rstr_coarse Coarse grid restriction 727 @param[in] basis_coarse Coarse grid active vector basis 728 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 729 @param[out] op_coarse Coarse grid operator 730 @param[out] op_prolong Coarse to fine operator, or NULL 731 @param[out] op_restrict Fine to coarse operator, or NULL 732 733 @return An error code: 0 - success, otherwise - failure 734 735 @ref Developer 736 **/ 737 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 738 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 739 bool is_composite; 740 Ceed ceed; 741 CeedInt num_comp; 742 CeedVector mult_vec = NULL; 743 CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL; 744 745 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 746 747 // Check for composite operator 748 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 749 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 750 751 // Coarse Grid 752 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 753 // -- Clone input fields 754 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 755 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 756 rstr_fine = op_fine->input_fields[i]->elem_rstr; 757 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 758 } else { 759 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 760 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 761 } 762 } 763 // -- Clone output fields 764 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 765 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 766 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 767 } else { 768 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 769 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 770 } 771 } 772 // -- Clone QFunctionAssemblyData 773 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 774 775 // Multiplicity vector 776 if (op_restrict || op_prolong) { 777 CeedVector mult_e_vec; 778 CeedRestrictionType rstr_type; 779 780 CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type)); 781 CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED, 782 "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported"); 783 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 784 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 785 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 786 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 787 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 788 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 789 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 790 CeedCall(CeedVectorDestroy(&mult_e_vec)); 791 CeedCall(CeedVectorReciprocal(mult_vec)); 792 } 793 794 // Clone name 795 bool has_name = op_fine->name; 796 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 797 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 798 799 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 800 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 801 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 802 803 // Restriction/Prolongation Operators 804 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 805 806 // Restriction 807 if (op_restrict) { 808 CeedInt *num_comp_r_data; 809 CeedQFunctionContext ctx_r; 810 CeedQFunction qf_restrict; 811 812 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 813 CeedCall(CeedCalloc(1, &num_comp_r_data)); 814 num_comp_r_data[0] = num_comp; 815 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 816 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 817 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 818 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 819 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 820 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 821 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 822 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 823 824 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 825 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 826 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 827 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 828 829 // Set name 830 char *restriction_name; 831 832 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 833 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 834 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 835 CeedCall(CeedFree(&restriction_name)); 836 837 // Check 838 CeedCall(CeedOperatorCheckReady(*op_restrict)); 839 840 // Cleanup 841 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 842 } 843 844 // Prolongation 845 if (op_prolong) { 846 CeedInt *num_comp_p_data; 847 CeedQFunctionContext ctx_p; 848 CeedQFunction qf_prolong; 849 850 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 851 CeedCall(CeedCalloc(1, &num_comp_p_data)); 852 num_comp_p_data[0] = num_comp; 853 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 854 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 855 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 856 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 857 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 858 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 859 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 860 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 861 862 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 863 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 864 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 865 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 866 867 // Set name 868 char *prolongation_name; 869 870 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 871 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 872 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 873 CeedCall(CeedFree(&prolongation_name)); 874 875 // Check 876 CeedCall(CeedOperatorCheckReady(*op_prolong)); 877 878 // Cleanup 879 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 880 } 881 882 // Check 883 CeedCall(CeedOperatorCheckReady(*op_coarse)); 884 885 // Cleanup 886 CeedCall(CeedVectorDestroy(&mult_vec)); 887 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 888 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 889 return CEED_ERROR_SUCCESS; 890 } 891 892 /** 893 @brief Build 1D mass matrix and Laplacian with perturbation 894 895 @param[in] interp_1d Interpolation matrix in one dimension 896 @param[in] grad_1d Gradient matrix in one dimension 897 @param[in] q_weight_1d Quadrature weights in one dimension 898 @param[in] P_1d Number of basis nodes in one dimension 899 @param[in] Q_1d Number of quadrature points in one dimension 900 @param[in] dim Dimension of basis 901 @param[out] mass Assembled mass matrix in one dimension 902 @param[out] laplace Assembled perturbed Laplacian in one dimension 903 904 @return An error code: 0 - success, otherwise - failure 905 906 @ref Developer 907 **/ 908 CeedPragmaOptimizeOff 909 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 910 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 911 for (CeedInt i = 0; i < P_1d; i++) { 912 for (CeedInt j = 0; j < P_1d; j++) { 913 CeedScalar sum = 0.0; 914 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 915 mass[i + j * P_1d] = sum; 916 } 917 } 918 // -- Laplacian 919 for (CeedInt i = 0; i < P_1d; i++) { 920 for (CeedInt j = 0; j < P_1d; j++) { 921 CeedScalar sum = 0.0; 922 923 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 924 laplace[i + j * P_1d] = sum; 925 } 926 } 927 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 928 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 929 return CEED_ERROR_SUCCESS; 930 } 931 CeedPragmaOptimizeOn 932 933 /// @} 934 935 /// ---------------------------------------------------------------------------- 936 /// CeedOperator Backend API 937 /// ---------------------------------------------------------------------------- 938 /// @addtogroup CeedOperatorBackend 939 /// @{ 940 941 /** 942 @brief Create object holding CeedQFunction assembly data for CeedOperator 943 944 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 945 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 946 947 @return An error code: 0 - success, otherwise - failure 948 949 @ref Backend 950 **/ 951 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 952 CeedCall(CeedCalloc(1, data)); 953 (*data)->ref_count = 1; 954 (*data)->ceed = ceed; 955 CeedCall(CeedReference(ceed)); 956 return CEED_ERROR_SUCCESS; 957 } 958 959 /** 960 @brief Increment the reference counter for a CeedQFunctionAssemblyData 961 962 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 963 964 @return An error code: 0 - success, otherwise - failure 965 966 @ref Backend 967 **/ 968 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 969 data->ref_count++; 970 return CEED_ERROR_SUCCESS; 971 } 972 973 /** 974 @brief Set re-use of CeedQFunctionAssemblyData 975 976 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 977 @param[in] reuse_data Boolean flag indicating data re-use 978 979 @return An error code: 0 - success, otherwise - failure 980 981 @ref Backend 982 **/ 983 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 984 data->reuse_data = reuse_data; 985 data->needs_data_update = true; 986 return CEED_ERROR_SUCCESS; 987 } 988 989 /** 990 @brief Mark QFunctionAssemblyData as stale 991 992 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 993 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 994 995 @return An error code: 0 - success, otherwise - failure 996 997 @ref Backend 998 **/ 999 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 1000 data->needs_data_update = needs_data_update; 1001 return CEED_ERROR_SUCCESS; 1002 } 1003 1004 /** 1005 @brief Determine if QFunctionAssemblyData needs update 1006 1007 @param[in] data CeedQFunctionAssemblyData to mark as stale 1008 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1009 1010 @return An error code: 0 - success, otherwise - failure 1011 1012 @ref Backend 1013 **/ 1014 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1015 *is_update_needed = !data->reuse_data || data->needs_data_update; 1016 return CEED_ERROR_SUCCESS; 1017 } 1018 1019 /** 1020 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1021 1022 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1023 1024 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1025 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1026 CeedQFunctionAssemblyData. 1027 1028 @param[in] data CeedQFunctionAssemblyData to copy reference to 1029 @param[in,out] data_copy Variable to store copied reference 1030 1031 @return An error code: 0 - success, otherwise - failure 1032 1033 @ref Backend 1034 **/ 1035 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1036 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1037 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1038 *data_copy = data; 1039 return CEED_ERROR_SUCCESS; 1040 } 1041 1042 /** 1043 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1044 1045 @param[in] data CeedQFunctionAssemblyData to retrieve status 1046 @param[out] is_setup Boolean flag for setup status 1047 1048 @return An error code: 0 - success, otherwise - failure 1049 1050 @ref Backend 1051 **/ 1052 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1053 *is_setup = data->is_setup; 1054 return CEED_ERROR_SUCCESS; 1055 } 1056 1057 /** 1058 @brief Set internal objects for CeedQFunctionAssemblyData 1059 1060 @param[in,out] data CeedQFunctionAssemblyData to set objects 1061 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1062 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1063 1064 @return An error code: 0 - success, otherwise - failure 1065 1066 @ref Backend 1067 **/ 1068 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1069 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1070 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1071 1072 data->is_setup = true; 1073 return CEED_ERROR_SUCCESS; 1074 } 1075 1076 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1077 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1078 1079 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1080 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1081 return CEED_ERROR_SUCCESS; 1082 } 1083 1084 /** 1085 @brief Destroy CeedQFunctionAssemblyData 1086 1087 @param[in,out] data CeedQFunctionAssemblyData to destroy 1088 1089 @return An error code: 0 - success, otherwise - failure 1090 1091 @ref Backend 1092 **/ 1093 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1094 if (!*data || --(*data)->ref_count > 0) { 1095 *data = NULL; 1096 return CEED_ERROR_SUCCESS; 1097 } 1098 CeedCall(CeedDestroy(&(*data)->ceed)); 1099 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1100 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1101 1102 CeedCall(CeedFree(data)); 1103 return CEED_ERROR_SUCCESS; 1104 } 1105 1106 /** 1107 @brief Get CeedOperatorAssemblyData 1108 1109 @param[in] op CeedOperator to assemble 1110 @param[out] data CeedQFunctionAssemblyData 1111 1112 @return An error code: 0 - success, otherwise - failure 1113 1114 @ref Backend 1115 **/ 1116 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1117 if (!op->op_assembled) { 1118 CeedOperatorAssemblyData data; 1119 1120 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1121 op->op_assembled = data; 1122 } 1123 *data = op->op_assembled; 1124 return CEED_ERROR_SUCCESS; 1125 } 1126 1127 /** 1128 @brief Create object holding CeedOperator assembly data. 1129 1130 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1131 An array with references to the corresponding active CeedElemRestrictions is also stored. 1132 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1133 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1134 CeedEvalMode. 1135 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1136 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1137 1138 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1139 @param[in] op CeedOperator to be assembled 1140 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1141 1142 @return An error code: 0 - success, otherwise - failure 1143 1144 @ref Backend 1145 **/ 1146 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1147 CeedInt num_active_bases = 0, num_input_fields, *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0, num_output_fields; 1148 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1149 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1150 CeedQFunctionField *qf_fields; 1151 CeedQFunction qf; 1152 CeedOperatorField *op_fields; 1153 1154 // Allocate 1155 CeedCall(CeedCalloc(1, data)); 1156 (*data)->ceed = ceed; 1157 CeedCall(CeedReference(ceed)); 1158 1159 // Build OperatorAssembly data 1160 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1161 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1162 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1163 1164 // Determine active input basis 1165 for (CeedInt i = 0; i < num_input_fields; i++) { 1166 CeedVector vec; 1167 1168 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1169 if (vec == CEED_VECTOR_ACTIVE) { 1170 CeedInt index = -1, num_comp, q_comp; 1171 CeedEvalMode eval_mode; 1172 CeedBasis basis_in = NULL; 1173 1174 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1175 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1176 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1177 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1178 for (CeedInt i = 0; i < num_active_bases; i++) { 1179 if ((*data)->active_bases[i] == basis_in) index = i; 1180 } 1181 if (index == -1) { 1182 CeedElemRestriction elem_rstr_in; 1183 1184 index = num_active_bases; 1185 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1186 (*data)->active_bases[num_active_bases] = NULL; 1187 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1188 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1189 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1190 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1191 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1192 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1193 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1194 num_eval_modes_in[index] = 0; 1195 num_eval_modes_out[index] = 0; 1196 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1197 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1198 eval_modes_in[index] = NULL; 1199 eval_modes_out[index] = NULL; 1200 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1201 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1202 eval_mode_offsets_in[index] = NULL; 1203 eval_mode_offsets_out[index] = NULL; 1204 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1205 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1206 (*data)->assembled_bases_in[index] = NULL; 1207 (*data)->assembled_bases_out[index] = NULL; 1208 num_active_bases++; 1209 } 1210 if (eval_mode != CEED_EVAL_WEIGHT) { 1211 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1212 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1213 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1214 for (CeedInt d = 0; d < q_comp; d++) { 1215 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1216 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1217 offset += num_comp; 1218 } 1219 num_eval_modes_in[index] += q_comp; 1220 } 1221 } 1222 } 1223 (*data)->num_eval_modes_in = num_eval_modes_in; 1224 (*data)->eval_modes_in = eval_modes_in; 1225 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1226 1227 // Determine active output basis 1228 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1229 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1230 offset = 0; 1231 for (CeedInt i = 0; i < num_output_fields; i++) { 1232 CeedVector vec; 1233 1234 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1235 if (vec == CEED_VECTOR_ACTIVE) { 1236 CeedInt index = -1, num_comp, q_comp; 1237 CeedEvalMode eval_mode; 1238 CeedBasis basis_out = NULL; 1239 1240 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1241 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1242 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1243 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1244 for (CeedInt i = 0; i < num_active_bases; i++) { 1245 if ((*data)->active_bases[i] == basis_out) index = i; 1246 } 1247 if (index == -1) { 1248 CeedElemRestriction elem_rstr_out; 1249 1250 index = num_active_bases; 1251 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1252 (*data)->active_bases[num_active_bases] = NULL; 1253 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1254 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1255 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1256 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1257 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1258 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1259 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1260 num_eval_modes_in[index] = 0; 1261 num_eval_modes_out[index] = 0; 1262 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1263 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1264 eval_modes_in[index] = NULL; 1265 eval_modes_out[index] = NULL; 1266 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1267 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1268 eval_mode_offsets_in[index] = NULL; 1269 eval_mode_offsets_out[index] = NULL; 1270 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1271 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1272 (*data)->assembled_bases_in[index] = NULL; 1273 (*data)->assembled_bases_out[index] = NULL; 1274 num_active_bases++; 1275 } 1276 if (eval_mode != CEED_EVAL_WEIGHT) { 1277 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1278 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1279 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1280 for (CeedInt d = 0; d < q_comp; d++) { 1281 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1282 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1283 offset += num_comp; 1284 } 1285 num_eval_modes_out[index] += q_comp; 1286 } 1287 } 1288 } 1289 (*data)->num_output_components = offset; 1290 (*data)->num_eval_modes_out = num_eval_modes_out; 1291 (*data)->eval_modes_out = eval_modes_out; 1292 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1293 (*data)->num_active_bases = num_active_bases; 1294 return CEED_ERROR_SUCCESS; 1295 } 1296 1297 /** 1298 @brief Get CeedOperator CeedEvalModes for assembly. 1299 1300 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1301 1302 @param[in] data CeedOperatorAssemblyData 1303 @param[out] num_active_bases Total number of active bases 1304 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1305 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1306 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1307 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1308 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1309 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1310 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1311 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1312 including contributions of all active bases 1313 1314 @return An error code: 0 - success, otherwise - failure 1315 1316 1317 @ref Backend 1318 **/ 1319 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1320 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1321 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1322 if (num_active_bases) *num_active_bases = data->num_active_bases; 1323 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1324 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1325 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1326 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1327 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1328 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1329 if (num_output_components) *num_output_components = data->num_output_components; 1330 return CEED_ERROR_SUCCESS; 1331 } 1332 1333 /** 1334 @brief Get CeedOperator CeedBasis data for assembly. 1335 1336 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1337 1338 @param[in] data CeedOperatorAssemblyData 1339 @param[out] num_active_bases Number of active bases, or NULL 1340 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1341 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1342 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1343 1344 @return An error code: 0 - success, otherwise - failure 1345 1346 @ref Backend 1347 **/ 1348 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1349 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1350 // Assemble B_in, B_out if needed 1351 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1352 CeedInt num_qpts; 1353 1354 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1355 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1356 bool has_eval_none = false; 1357 CeedInt num_nodes; 1358 CeedScalar *B_in = NULL, *identity = NULL; 1359 1360 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1361 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1362 1363 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1364 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1365 } 1366 if (has_eval_none) { 1367 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1368 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1369 identity[i * num_nodes + i] = 1.0; 1370 } 1371 } 1372 1373 for (CeedInt q = 0; q < num_qpts; q++) { 1374 for (CeedInt n = 0; n < num_nodes; n++) { 1375 CeedInt d_in = 0, q_comp_in; 1376 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1377 1378 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1379 const CeedInt qq = data->num_eval_modes_in[b] * q; 1380 const CeedScalar *B = NULL; 1381 1382 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B); 1383 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1384 if (q_comp_in > 1) { 1385 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1386 else B = &B[(++d_in) * num_qpts * num_nodes]; 1387 } 1388 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1389 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1390 } 1391 } 1392 } 1393 if (identity) CeedCall(CeedFree(&identity)); 1394 data->assembled_bases_in[b] = B_in; 1395 } 1396 } 1397 1398 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1399 CeedInt num_qpts; 1400 1401 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1402 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1403 bool has_eval_none = false; 1404 CeedInt num_nodes; 1405 CeedScalar *B_out = NULL, *identity = NULL; 1406 1407 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1408 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1409 1410 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1411 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1412 } 1413 if (has_eval_none) { 1414 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1415 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1416 identity[i * num_nodes + i] = 1.0; 1417 } 1418 } 1419 1420 for (CeedInt q = 0; q < num_qpts; q++) { 1421 for (CeedInt n = 0; n < num_nodes; n++) { 1422 CeedInt d_out = 0, q_comp_out; 1423 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1424 1425 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1426 const CeedInt qq = data->num_eval_modes_out[b] * q; 1427 const CeedScalar *B = NULL; 1428 1429 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B); 1430 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1431 if (q_comp_out > 1) { 1432 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1433 else B = &B[(++d_out) * num_qpts * num_nodes]; 1434 } 1435 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1436 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1437 } 1438 } 1439 } 1440 if (identity) CeedCall(CeedFree(&identity)); 1441 data->assembled_bases_out[b] = B_out; 1442 } 1443 } 1444 1445 // Pass out assembled data 1446 if (active_bases) *active_bases = data->active_bases; 1447 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1448 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1449 return CEED_ERROR_SUCCESS; 1450 } 1451 1452 /** 1453 @brief Get CeedOperator CeedBasis data for assembly. 1454 1455 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1456 1457 @param[in] data CeedOperatorAssemblyData 1458 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1459 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1460 1461 @return An error code: 0 - success, otherwise - failure 1462 1463 @ref Backend 1464 **/ 1465 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1466 CeedElemRestriction **active_elem_rstrs) { 1467 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1468 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1469 return CEED_ERROR_SUCCESS; 1470 } 1471 1472 /** 1473 @brief Destroy CeedOperatorAssemblyData 1474 1475 @param[in,out] data CeedOperatorAssemblyData to destroy 1476 1477 @return An error code: 0 - success, otherwise - failure 1478 1479 @ref Backend 1480 **/ 1481 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1482 if (!*data) { 1483 *data = NULL; 1484 return CEED_ERROR_SUCCESS; 1485 } 1486 CeedCall(CeedDestroy(&(*data)->ceed)); 1487 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1488 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1489 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1490 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1491 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1492 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1493 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1494 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1495 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1496 } 1497 CeedCall(CeedFree(&(*data)->active_bases)); 1498 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1499 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1500 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1501 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1502 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1503 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1504 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1505 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1506 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1507 1508 CeedCall(CeedFree(data)); 1509 return CEED_ERROR_SUCCESS; 1510 } 1511 1512 /// @} 1513 1514 /// ---------------------------------------------------------------------------- 1515 /// CeedOperator Public API 1516 /// ---------------------------------------------------------------------------- 1517 /// @addtogroup CeedOperatorUser 1518 /// @{ 1519 1520 /** 1521 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1522 1523 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1524 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1525 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1526 1527 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1528 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1529 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1530 1531 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1532 1533 @param[in] op CeedOperator to assemble CeedQFunction 1534 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1535 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1536 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1537 1538 @return An error code: 0 - success, otherwise - failure 1539 1540 @ref User 1541 **/ 1542 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1543 CeedCall(CeedOperatorCheckReady(op)); 1544 1545 if (op->LinearAssembleQFunction) { 1546 // Backend version 1547 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1548 } else { 1549 // Operator fallback 1550 CeedOperator op_fallback; 1551 1552 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1553 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1554 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1555 } 1556 return CEED_ERROR_SUCCESS; 1557 } 1558 1559 /** 1560 @brief Assemble CeedQFunction and store result internally. 1561 1562 Return copied references of stored data to the caller. 1563 Caller is responsible for ownership and destruction of the copied references. 1564 See also @ref CeedOperatorLinearAssembleQFunction 1565 1566 Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers. 1567 These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object. 1568 1569 @param[in] op CeedOperator to assemble CeedQFunction 1570 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1571 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1572 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1573 1574 @return An error code: 0 - success, otherwise - failure 1575 1576 @ref User 1577 **/ 1578 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1579 int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL; 1580 CeedOperator op_assemble = NULL; 1581 1582 CeedCall(CeedOperatorCheckReady(op)); 1583 1584 // Determine if fallback parent or operator has implementation 1585 if (op->op_fallback_parent && op->op_fallback_parent->LinearAssembleQFunctionUpdate) { 1586 // -- Backend version for op fallback parent is faster, if it exists 1587 LinearAssembleQFunctionUpdate = op->op_fallback_parent->LinearAssembleQFunctionUpdate; 1588 op_assemble = op->op_fallback_parent; 1589 } else if (op->LinearAssembleQFunctionUpdate) { 1590 // -- Backend version for op 1591 LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate; 1592 op_assemble = op; 1593 } 1594 1595 // Assemble QFunction 1596 if (LinearAssembleQFunctionUpdate) { 1597 // Backend or fallback parent version 1598 bool qf_assembled_is_setup; 1599 CeedVector assembled_vec = NULL; 1600 CeedElemRestriction assembled_rstr = NULL; 1601 1602 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1603 if (qf_assembled_is_setup) { 1604 bool update_needed; 1605 1606 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1607 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1608 if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request)); 1609 } else { 1610 CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request)); 1611 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1612 } 1613 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1614 1615 // Copy reference from internally held copy 1616 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1617 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1618 CeedCall(CeedVectorDestroy(&assembled_vec)); 1619 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1620 } else { 1621 // Operator fallback 1622 CeedOperator op_fallback; 1623 1624 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1625 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1626 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1627 } 1628 return CEED_ERROR_SUCCESS; 1629 } 1630 1631 /** 1632 @brief Assemble the diagonal of a square linear CeedOperator 1633 1634 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1635 1636 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1637 1638 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1639 1640 @param[in] op CeedOperator to assemble CeedQFunction 1641 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1642 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1643 1644 @return An error code: 0 - success, otherwise - failure 1645 1646 @ref User 1647 **/ 1648 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1649 bool is_composite; 1650 CeedSize input_size = 0, output_size = 0; 1651 1652 CeedCall(CeedOperatorCheckReady(op)); 1653 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1654 1655 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1656 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1657 1658 // Early exit for empty operator 1659 if (!is_composite) { 1660 CeedInt num_elem = 0; 1661 1662 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1663 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1664 } 1665 1666 if (op->LinearAssembleDiagonal) { 1667 // Backend version 1668 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1669 return CEED_ERROR_SUCCESS; 1670 } else if (op->LinearAssembleAddDiagonal) { 1671 // Backend version with zeroing first 1672 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1673 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1674 return CEED_ERROR_SUCCESS; 1675 } else { 1676 // Operator fallback 1677 CeedOperator op_fallback; 1678 1679 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1680 if (op_fallback) { 1681 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1682 return CEED_ERROR_SUCCESS; 1683 } 1684 } 1685 // Default interface implementation 1686 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1687 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1688 return CEED_ERROR_SUCCESS; 1689 } 1690 1691 /** 1692 @brief Assemble the diagonal of a square linear CeedOperator 1693 1694 This sums into a CeedVector the diagonal of a linear CeedOperator. 1695 1696 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1697 1698 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1699 1700 @param[in] op CeedOperator to assemble CeedQFunction 1701 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1702 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1703 1704 @return An error code: 0 - success, otherwise - failure 1705 1706 @ref User 1707 **/ 1708 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1709 bool is_composite; 1710 CeedSize input_size = 0, output_size = 0; 1711 1712 CeedCall(CeedOperatorCheckReady(op)); 1713 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1714 1715 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1716 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1717 1718 // Early exit for empty operator 1719 if (!is_composite) { 1720 CeedInt num_elem = 0; 1721 1722 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1723 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1724 } 1725 1726 if (op->LinearAssembleAddDiagonal) { 1727 // Backend version 1728 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1729 return CEED_ERROR_SUCCESS; 1730 } else { 1731 // Operator fallback 1732 CeedOperator op_fallback; 1733 1734 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1735 if (op_fallback) { 1736 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1737 return CEED_ERROR_SUCCESS; 1738 } 1739 } 1740 // Default interface implementation 1741 if (is_composite) { 1742 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1743 } else { 1744 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1745 } 1746 return CEED_ERROR_SUCCESS; 1747 } 1748 1749 /** 1750 @brief Assemble the point block diagonal of a square linear CeedOperator 1751 1752 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1753 1754 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1755 1756 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1757 1758 @param[in] op CeedOperator to assemble CeedQFunction 1759 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1760 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1761 component in]. 1762 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1763 1764 @return An error code: 0 - success, otherwise - failure 1765 1766 @ref User 1767 **/ 1768 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1769 bool is_composite; 1770 CeedSize input_size = 0, output_size = 0; 1771 1772 CeedCall(CeedOperatorCheckReady(op)); 1773 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1774 1775 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1776 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1777 1778 // Early exit for empty operator 1779 if (!is_composite) { 1780 CeedInt num_elem = 0; 1781 1782 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1783 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1784 } 1785 1786 if (op->LinearAssemblePointBlockDiagonal) { 1787 // Backend version 1788 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1789 return CEED_ERROR_SUCCESS; 1790 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1791 // Backend version with zeroing first 1792 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1793 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1794 return CEED_ERROR_SUCCESS; 1795 } else { 1796 // Operator fallback 1797 CeedOperator op_fallback; 1798 1799 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1800 if (op_fallback) { 1801 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1802 return CEED_ERROR_SUCCESS; 1803 } 1804 } 1805 // Default interface implementation 1806 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1807 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1808 return CEED_ERROR_SUCCESS; 1809 } 1810 1811 /** 1812 @brief Assemble the point block diagonal of a square linear CeedOperator 1813 1814 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1815 1816 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1817 1818 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1819 1820 @param[in] op CeedOperator to assemble CeedQFunction 1821 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1822 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1823 component in]. 1824 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1825 1826 @return An error code: 0 - success, otherwise - failure 1827 1828 @ref User 1829 **/ 1830 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1831 bool is_composite; 1832 CeedSize input_size = 0, output_size = 0; 1833 1834 CeedCall(CeedOperatorCheckReady(op)); 1835 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1836 1837 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1838 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1839 1840 // Early exit for empty operator 1841 if (!is_composite) { 1842 CeedInt num_elem = 0; 1843 1844 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1845 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1846 } 1847 1848 if (op->LinearAssembleAddPointBlockDiagonal) { 1849 // Backend version 1850 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1851 return CEED_ERROR_SUCCESS; 1852 } else { 1853 // Operator fallback 1854 CeedOperator op_fallback; 1855 1856 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1857 if (op_fallback) { 1858 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1859 return CEED_ERROR_SUCCESS; 1860 } 1861 } 1862 // Default interface implementation 1863 if (is_composite) { 1864 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1865 } else { 1866 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1867 } 1868 return CEED_ERROR_SUCCESS; 1869 } 1870 1871 /** 1872 @brief Fully assemble the nonzero pattern of a linear operator. 1873 1874 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1875 1876 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1877 matrix in entry (i, j). 1878 Note that the (i, j) pairs are not unique and may repeat. 1879 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1880 1881 This will generally be slow unless your operator is low-order. 1882 1883 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1884 1885 @param[in] op CeedOperator to assemble 1886 @param[out] num_entries Number of entries in coordinate nonzero pattern 1887 @param[out] rows Row number for each entry 1888 @param[out] cols Column number for each entry 1889 1890 @ref User 1891 **/ 1892 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1893 bool is_composite; 1894 CeedInt num_suboperators, offset = 0; 1895 CeedSize single_entries; 1896 CeedOperator *sub_operators; 1897 1898 CeedCall(CeedOperatorCheckReady(op)); 1899 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1900 1901 if (op->LinearAssembleSymbolic) { 1902 // Backend version 1903 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1904 return CEED_ERROR_SUCCESS; 1905 } else { 1906 // Operator fallback 1907 CeedOperator op_fallback; 1908 1909 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1910 if (op_fallback) { 1911 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1912 return CEED_ERROR_SUCCESS; 1913 } 1914 } 1915 1916 // Default interface implementation 1917 1918 // count entries and allocate rows, cols arrays 1919 *num_entries = 0; 1920 if (is_composite) { 1921 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1922 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1923 for (CeedInt k = 0; k < num_suboperators; ++k) { 1924 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1925 *num_entries += single_entries; 1926 } 1927 } else { 1928 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1929 *num_entries += single_entries; 1930 } 1931 CeedCall(CeedCalloc(*num_entries, rows)); 1932 CeedCall(CeedCalloc(*num_entries, cols)); 1933 1934 // assemble nonzero locations 1935 if (is_composite) { 1936 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1937 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1938 for (CeedInt k = 0; k < num_suboperators; ++k) { 1939 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1940 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1941 offset += single_entries; 1942 } 1943 } else { 1944 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1945 } 1946 return CEED_ERROR_SUCCESS; 1947 } 1948 1949 /** 1950 @brief Fully assemble the nonzero entries of a linear operator. 1951 1952 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1953 1954 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1955 matrix in entry (i, j). 1956 Note that the (i, j) pairs are not unique and may repeat. 1957 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1958 1959 This will generally be slow unless your operator is low-order. 1960 1961 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1962 1963 @param[in] op CeedOperator to assemble 1964 @param[out] values Values to assemble into matrix 1965 1966 @ref User 1967 **/ 1968 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1969 bool is_composite; 1970 CeedInt num_suboperators, offset = 0; 1971 CeedSize single_entries = 0; 1972 CeedOperator *sub_operators; 1973 1974 CeedCall(CeedOperatorCheckReady(op)); 1975 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1976 1977 // Early exit for empty operator 1978 if (!is_composite) { 1979 CeedInt num_elem = 0; 1980 1981 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1982 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1983 } 1984 1985 if (op->LinearAssemble) { 1986 // Backend version 1987 CeedCall(op->LinearAssemble(op, values)); 1988 return CEED_ERROR_SUCCESS; 1989 } else { 1990 // Operator fallback 1991 CeedOperator op_fallback; 1992 1993 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1994 if (op_fallback) { 1995 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 1996 return CEED_ERROR_SUCCESS; 1997 } 1998 } 1999 2000 // Default interface implementation 2001 CeedCall(CeedVectorSetValue(values, 0.0)); 2002 if (is_composite) { 2003 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2004 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2005 for (CeedInt k = 0; k < num_suboperators; k++) { 2006 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2007 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2008 offset += single_entries; 2009 } 2010 } else { 2011 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2012 } 2013 return CEED_ERROR_SUCCESS; 2014 } 2015 2016 /** 2017 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2018 2019 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2020 2021 @param[in] op Composite CeedOperator 2022 @param[in] num_skip_indices Number of suboperators to skip 2023 @param[in] skip_indices Array of indices of suboperators to skip 2024 @param[out] mult Vector to store multiplicity (of size l_size) 2025 2026 @return An error code: 0 - success, otherwise - failure 2027 2028 @ref User 2029 **/ 2030 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2031 Ceed ceed; 2032 CeedInt num_suboperators; 2033 CeedSize l_vec_len; 2034 CeedScalar *mult_array; 2035 CeedVector ones_l_vec; 2036 CeedElemRestriction elem_rstr, mult_elem_rstr; 2037 CeedOperator *sub_operators; 2038 2039 CeedCall(CeedOperatorCheckReady(op)); 2040 2041 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2042 2043 // Zero mult vector 2044 CeedCall(CeedVectorSetValue(mult, 0.0)); 2045 2046 // Get suboperators 2047 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2048 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2049 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2050 2051 // Work vector 2052 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2053 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2054 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2055 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2056 2057 // Compute multiplicity across suboperators 2058 for (CeedInt i = 0; i < num_suboperators; i++) { 2059 const CeedScalar *sub_mult_array; 2060 CeedVector sub_mult_l_vec, ones_e_vec; 2061 2062 // -- Check for suboperator to skip 2063 for (CeedInt j = 0; j < num_skip_indices; j++) { 2064 if (skip_indices[j] == i) continue; 2065 } 2066 2067 // -- Sub operator multiplicity 2068 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2069 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr)); 2070 CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2071 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2072 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2073 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2074 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2075 // ---- Flag every node present in the current suboperator 2076 for (CeedInt j = 0; j < l_vec_len; j++) { 2077 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2078 } 2079 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2080 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2081 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2082 CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr)); 2083 } 2084 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2085 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2086 return CEED_ERROR_SUCCESS; 2087 } 2088 2089 /** 2090 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2091 grid interpolation 2092 2093 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2094 2095 @param[in] op_fine Fine grid operator 2096 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2097 @param[in] rstr_coarse Coarse grid restriction 2098 @param[in] basis_coarse Coarse grid active vector basis 2099 @param[out] op_coarse Coarse grid operator 2100 @param[out] op_prolong Coarse to fine operator, or NULL 2101 @param[out] op_restrict Fine to coarse operator, or NULL 2102 2103 @return An error code: 0 - success, otherwise - failure 2104 2105 @ref User 2106 **/ 2107 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2108 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2109 CeedBasis basis_c_to_f = NULL; 2110 2111 CeedCall(CeedOperatorCheckReady(op_fine)); 2112 2113 // Build prolongation matrix, if required 2114 if (op_prolong || op_restrict) { 2115 CeedBasis basis_fine; 2116 2117 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2118 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2119 } 2120 2121 // Core code 2122 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2123 return CEED_ERROR_SUCCESS; 2124 } 2125 2126 /** 2127 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2128 2129 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2130 2131 @param[in] op_fine Fine grid operator 2132 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2133 @param[in] rstr_coarse Coarse grid restriction 2134 @param[in] basis_coarse Coarse grid active vector basis 2135 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2136 @param[out] op_coarse Coarse grid operator 2137 @param[out] op_prolong Coarse to fine operator, or NULL 2138 @param[out] op_restrict Fine to coarse operator, or NULL 2139 2140 @return An error code: 0 - success, otherwise - failure 2141 2142 @ref User 2143 **/ 2144 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2145 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2146 CeedOperator *op_restrict) { 2147 Ceed ceed; 2148 CeedInt Q_f, Q_c; 2149 CeedBasis basis_fine, basis_c_to_f = NULL; 2150 2151 CeedCall(CeedOperatorCheckReady(op_fine)); 2152 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2153 2154 // Check for compatible quadrature spaces 2155 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2156 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2157 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2158 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2159 2160 // Create coarse to fine basis, if required 2161 if (op_prolong || op_restrict) { 2162 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2163 CeedScalar *q_ref, *q_weight, *grad; 2164 2165 // Check if interpolation matrix is provided 2166 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2167 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2168 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2169 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2170 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2171 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2172 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2173 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2174 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2175 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2176 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2177 CeedCall(CeedFree(&q_ref)); 2178 CeedCall(CeedFree(&q_weight)); 2179 CeedCall(CeedFree(&grad)); 2180 } 2181 2182 // Core code 2183 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2184 return CEED_ERROR_SUCCESS; 2185 } 2186 2187 /** 2188 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2189 2190 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2191 2192 @param[in] op_fine Fine grid operator 2193 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2194 @param[in] rstr_coarse Coarse grid restriction 2195 @param[in] basis_coarse Coarse grid active vector basis 2196 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2197 @param[out] op_coarse Coarse grid operator 2198 @param[out] op_prolong Coarse to fine operator, or NULL 2199 @param[out] op_restrict Fine to coarse operator, or NULL 2200 2201 @return An error code: 0 - success, otherwise - failure 2202 2203 @ref User 2204 **/ 2205 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2206 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2207 CeedOperator *op_restrict) { 2208 Ceed ceed; 2209 CeedInt Q_f, Q_c; 2210 CeedBasis basis_fine, basis_c_to_f = NULL; 2211 2212 CeedCall(CeedOperatorCheckReady(op_fine)); 2213 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2214 2215 // Check for compatible quadrature spaces 2216 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2217 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2218 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2219 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2220 2221 // Coarse to fine basis 2222 if (op_prolong || op_restrict) { 2223 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2224 CeedScalar *q_ref, *q_weight, *grad; 2225 CeedElemTopology topo; 2226 2227 // Check if interpolation matrix is provided 2228 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2229 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2230 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2231 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2232 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2233 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2234 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2235 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2236 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2237 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2238 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2239 CeedCall(CeedFree(&q_ref)); 2240 CeedCall(CeedFree(&q_weight)); 2241 CeedCall(CeedFree(&grad)); 2242 } 2243 2244 // Core code 2245 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2246 return CEED_ERROR_SUCCESS; 2247 } 2248 2249 /** 2250 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2251 2252 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2253 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2254 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2255 \hat S V\f$. 2256 The CeedOperator must be linear and non-composite. 2257 The associated CeedQFunction must therefore also be linear. 2258 2259 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2260 2261 @param[in] op CeedOperator to create element inverses 2262 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2263 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2264 2265 @return An error code: 0 - success, otherwise - failure 2266 2267 @ref User 2268 **/ 2269 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2270 Ceed ceed, ceed_parent; 2271 bool interp = false, grad = false, is_tensor_basis = true; 2272 CeedInt num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2273 CeedSize l_size = 1; 2274 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg; 2275 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2276 CeedVector q_data; 2277 CeedElemRestriction rstr = NULL, rstr_qd_i; 2278 CeedBasis basis = NULL, fdm_basis; 2279 CeedQFunctionContext ctx_fdm; 2280 CeedQFunctionField *qf_fields; 2281 CeedQFunction qf, qf_fdm; 2282 CeedOperatorField *op_fields; 2283 2284 CeedCall(CeedOperatorCheckReady(op)); 2285 2286 if (op->CreateFDMElementInverse) { 2287 // Backend version 2288 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2289 return CEED_ERROR_SUCCESS; 2290 } else { 2291 // Operator fallback 2292 CeedOperator op_fallback; 2293 2294 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2295 if (op_fallback) { 2296 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2297 return CEED_ERROR_SUCCESS; 2298 } 2299 } 2300 2301 // Default interface implementation 2302 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2303 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2304 ceed_parent = ceed_parent ? ceed_parent : ceed; 2305 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2306 2307 // Determine active input basis 2308 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2309 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2310 for (CeedInt i = 0; i < num_input_fields; i++) { 2311 CeedVector vec; 2312 2313 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2314 if (vec == CEED_VECTOR_ACTIVE) { 2315 CeedEvalMode eval_mode; 2316 2317 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2318 interp = interp || eval_mode == CEED_EVAL_INTERP; 2319 grad = grad || eval_mode == CEED_EVAL_GRAD; 2320 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2321 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2322 } 2323 } 2324 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2325 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2326 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2327 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2328 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2329 CeedCall(CeedBasisGetDimension(basis, &dim)); 2330 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2331 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2332 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2333 2334 // Build and diagonalize 1D Mass and Laplacian 2335 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2336 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2337 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2338 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2339 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2340 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2341 CeedCall(CeedCalloc(P_1d, &lambda)); 2342 // -- Build matrices 2343 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2344 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2345 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2346 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2347 2348 // -- Diagonalize 2349 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2350 CeedCall(CeedFree(&mass)); 2351 CeedCall(CeedFree(&laplace)); 2352 for (CeedInt i = 0; i < P_1d; i++) { 2353 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2354 } 2355 CeedCall(CeedFree(&x)); 2356 2357 { 2358 CeedInt layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2359 CeedScalar max_norm = 0; 2360 const CeedScalar *assembled_array, *q_weight_array; 2361 CeedVector assembled = NULL, q_weight; 2362 CeedElemRestriction rstr_qf = NULL; 2363 2364 // Assemble QFunction 2365 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2366 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2367 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2368 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2369 2370 // Calculate element averages 2371 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2372 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2373 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2374 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2375 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2376 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2377 2378 for (CeedInt e = 0; e < num_elem; e++) { 2379 CeedInt count = 0; 2380 2381 for (CeedInt q = 0; q < num_qpts; q++) { 2382 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2383 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2384 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2385 count++; 2386 } 2387 } 2388 } 2389 if (count) { 2390 elem_avg[e] /= count; 2391 } else { 2392 elem_avg[e] = 1.0; 2393 } 2394 } 2395 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2396 CeedCall(CeedVectorDestroy(&assembled)); 2397 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2398 CeedCall(CeedVectorDestroy(&q_weight)); 2399 } 2400 2401 // Build FDM diagonal 2402 { 2403 CeedScalar *q_data_array, *fdm_diagonal; 2404 2405 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2406 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2407 for (CeedInt c = 0; c < num_comp; c++) { 2408 for (CeedInt n = 0; n < num_nodes; n++) { 2409 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2410 if (grad) { 2411 for (CeedInt d = 0; d < dim; d++) { 2412 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2413 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2414 } 2415 } 2416 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2417 } 2418 } 2419 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2420 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2421 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2422 for (CeedInt e = 0; e < num_elem; e++) { 2423 for (CeedInt c = 0; c < num_comp; c++) { 2424 for (CeedInt n = 0; n < num_nodes; n++) 2425 q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2426 } 2427 } 2428 CeedCall(CeedFree(&elem_avg)); 2429 CeedCall(CeedFree(&fdm_diagonal)); 2430 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2431 } 2432 2433 // Setup FDM operator 2434 // -- Basis 2435 { 2436 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2437 2438 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2439 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2440 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2441 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2442 CeedCall(CeedFree(&fdm_interp)); 2443 CeedCall(CeedFree(&grad_dummy)); 2444 CeedCall(CeedFree(&q_ref_dummy)); 2445 CeedCall(CeedFree(&q_weight_dummy)); 2446 CeedCall(CeedFree(&lambda)); 2447 } 2448 2449 // -- Restriction 2450 { 2451 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2452 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2453 } 2454 2455 // -- QFunction 2456 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2457 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2458 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2459 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2460 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2461 2462 // -- QFunction context 2463 { 2464 CeedInt *num_comp_data; 2465 2466 CeedCall(CeedCalloc(1, &num_comp_data)); 2467 num_comp_data[0] = num_comp; 2468 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2469 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2470 } 2471 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2472 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2473 2474 // -- Operator 2475 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2476 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2477 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2478 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2479 2480 // Cleanup 2481 CeedCall(CeedVectorDestroy(&q_data)); 2482 CeedCall(CeedBasisDestroy(&fdm_basis)); 2483 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2484 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2485 return CEED_ERROR_SUCCESS; 2486 } 2487 2488 /// @} 2489