1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) { 128 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 } 130 // Cleanup 131 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 133 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 134 } 135 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 136 CeedCall(CeedOperatorCheckReady(op_fallback)); 137 op->op_fallback = op_fallback; 138 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 144 145 @param[in] op CeedOperator to retrieve fallback for 146 @param[out] op_fallback Fallback CeedOperator 147 148 @return An error code: 0 - success, otherwise - failure 149 150 @ref Developer 151 **/ 152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 153 // Create if needed 154 if (!op->op_fallback) { 155 CeedCall(CeedOperatorCreateFallback(op)); 156 } 157 if (op->op_fallback) { 158 bool is_debug; 159 160 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 161 if (is_debug) { 162 Ceed ceed, ceed_fallback; 163 const char *resource, *resource_fallback; 164 165 CeedCall(CeedOperatorGetCeed(op, &ceed)); 166 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 167 CeedCall(CeedGetResource(ceed, &resource)); 168 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 169 170 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 171 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 172 op->op_fallback); 173 } 174 } 175 *op_fallback = op->op_fallback; 176 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /** 181 @brief Select correct basis matrix pointer based on CeedEvalMode 182 183 @param[in] basis CeedBasis from which to get the basis matrix 184 @param[in] eval_mode Current basis evaluation mode 185 @param[in] identity Pointer to identity matrix 186 @param[out] basis_ptr Basis pointer to set 187 188 @ref Developer 189 **/ 190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 191 switch (eval_mode) { 192 case CEED_EVAL_NONE: 193 *basis_ptr = identity; 194 break; 195 case CEED_EVAL_INTERP: 196 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 197 break; 198 case CEED_EVAL_GRAD: 199 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 200 break; 201 case CEED_EVAL_DIV: 202 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 203 break; 204 case CEED_EVAL_CURL: 205 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 206 break; 207 case CEED_EVAL_WEIGHT: 208 break; // Caught by QF Assembly 209 } 210 assert(*basis_ptr != NULL); 211 212 return CEED_ERROR_SUCCESS; 213 } 214 215 /** 216 @brief Create point block restriction for active operator field 217 218 @param[in] rstr Original CeedElemRestriction for active field 219 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 220 221 @return An error code: 0 - success, otherwise - failure 222 223 @ref Developer 224 **/ 225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 226 Ceed ceed; 227 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 228 const CeedInt *offsets; 229 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 230 231 // Expand offsets 232 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 233 CeedSize l_size; 234 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 235 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 236 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 237 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 238 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 239 CeedInt shift = num_comp; 240 if (comp_stride != 1) shift *= num_comp; 241 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 242 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 243 pointblock_offsets[i] = offsets[i] * shift; 244 } 245 246 // Create new restriction 247 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 248 pointblock_offsets, pointblock_rstr)); 249 250 // Cleanup 251 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 252 253 return CEED_ERROR_SUCCESS; 254 } 255 256 /** 257 @brief Core logic for assembling operator diagonal or point block diagonal 258 259 @param[in] op CeedOperator to assemble point block diagonal 260 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 261 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 262 @param[out] assembled CeedVector to store assembled diagonal 263 264 @return An error code: 0 - success, otherwise - failure 265 266 @ref Developer 267 **/ 268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 269 Ceed ceed; 270 CeedCall(CeedOperatorGetCeed(op, &ceed)); 271 272 // Assemble QFunction 273 CeedQFunction qf; 274 const CeedScalar *assembled_qf_array; 275 CeedVector assembled_qf; 276 CeedElemRestriction assembled_elem_rstr; 277 CeedInt num_input_fields, num_output_fields; 278 CeedInt layout[3]; 279 280 CeedCall(CeedOperatorGetQFunction(op, &qf)); 281 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 282 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 283 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 284 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 285 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 286 287 // Get assembly data 288 CeedOperatorAssemblyData data; 289 const CeedEvalMode **eval_modes_in, **eval_modes_out; 290 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 291 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 292 CeedBasis *active_bases; 293 CeedElemRestriction *active_elem_rstrs; 294 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 295 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 296 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 297 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 298 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 299 300 // Loop over all active bases 301 for (CeedInt b = 0; b < num_active_bases; b++) { 302 // Assemble point block diagonal restriction, if needed 303 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 304 305 if (is_pointblock) { 306 CeedElemRestriction point_block_elem_rstr; 307 308 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 309 diag_elem_rstr = point_block_elem_rstr; 310 } 311 312 // Create diagonal vector 313 CeedVector elem_diag; 314 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 315 316 // Assemble element operator diagonals 317 CeedScalar *elem_diag_array; 318 CeedInt num_elem, num_nodes, num_qpts, num_components; 319 320 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 321 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 322 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 323 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 324 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 325 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 326 327 // Construct identity matrix for basis if required 328 bool has_eval_none = false; 329 CeedScalar *identity = NULL; 330 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 331 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 332 } 333 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 334 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 335 } 336 if (has_eval_none) { 337 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 338 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 339 } 340 341 // Compute the diagonal of B^T D B 342 // Each element 343 for (CeedInt e = 0; e < num_elem; e++) { 344 // Each basis eval mode pair 345 CeedInt d_out = 0, q_comp_out; 346 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 347 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 348 const CeedScalar *B_t = NULL; 349 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t); 350 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out)); 351 if (q_comp_out > 1) { 352 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 353 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 354 } 355 eval_mode_out_prev = eval_modes_out[b][e_out]; 356 357 CeedInt d_in = 0, q_comp_in; 358 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 359 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 360 const CeedScalar *B = NULL; 361 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B); 362 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in)); 363 if (q_comp_in > 1) { 364 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 365 else B = &B[(++d_in) * num_qpts * num_nodes]; 366 } 367 eval_mode_in_prev = eval_modes_in[b][e_in]; 368 369 // Each component 370 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 371 // Each qpt/node pair 372 for (CeedInt q = 0; q < num_qpts; q++) { 373 if (is_pointblock) { 374 // Point Block Diagonal 375 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 376 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 377 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 378 for (CeedInt n = 0; n < num_nodes; n++) { 379 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 380 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 381 } 382 } 383 } else { 384 // Diagonal Only 385 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 386 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 387 for (CeedInt n = 0; n < num_nodes; n++) { 388 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 389 } 390 } 391 } 392 } 393 } 394 } 395 } 396 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 397 398 // Assemble local operator diagonal 399 CeedCall(CeedElemRestrictionApplyUnsigned(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 400 401 // Cleanup 402 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 403 CeedCall(CeedVectorDestroy(&elem_diag)); 404 CeedCall(CeedFree(&identity)); 405 } 406 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 407 CeedCall(CeedVectorDestroy(&assembled_qf)); 408 409 return CEED_ERROR_SUCCESS; 410 } 411 412 /** 413 @brief Core logic for assembling composite operator diagonal 414 415 @param[in] op CeedOperator to assemble point block diagonal 416 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 417 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 418 @param[out] assembled CeedVector to store assembled diagonal 419 420 @return An error code: 0 - success, otherwise - failure 421 422 @ref Developer 423 **/ 424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 CeedInt num_sub; 427 CeedOperator *suboperators; 428 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 429 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 430 for (CeedInt i = 0; i < num_sub; i++) { 431 if (is_pointblock) { 432 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 433 } else { 434 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 435 } 436 } 437 return CEED_ERROR_SUCCESS; 438 } 439 440 /** 441 @brief Build nonzero pattern for non-composite operator 442 443 Users should generally use CeedOperatorLinearAssembleSymbolic() 444 445 Note: For operators using oriented element restrictions, entries in rows or cols may be negative indicating the assembled value at this nonzero 446 should be negated 447 448 @param[in] op CeedOperator to assemble nonzero pattern 449 @param[in] offset Offset for number of entries 450 @param[out] rows Row number for each entry 451 @param[out] cols Column number for each entry 452 453 @return An error code: 0 - success, otherwise - failure 454 455 @ref Developer 456 **/ 457 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 458 Ceed ceed; 459 bool is_composite; 460 CeedCall(CeedOperatorGetCeed(op, &ceed)); 461 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 462 463 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 464 465 CeedSize num_nodes; 466 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 467 CeedElemRestriction rstr_in; 468 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 469 CeedInt num_elem, elem_size, num_comp; 470 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 471 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 472 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 473 CeedInt layout_er[3]; 474 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 475 476 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 477 478 // Determine elem_dof relation 479 CeedVector index_vec; 480 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 481 CeedScalar *array; 482 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 483 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 484 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 485 CeedVector elem_dof; 486 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 487 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 488 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 489 const CeedScalar *elem_dof_a; 490 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 491 CeedCall(CeedVectorDestroy(&index_vec)); 492 493 // Determine i, j locations for element matrices 494 CeedInt count = 0; 495 for (CeedInt e = 0; e < num_elem; e++) { 496 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 497 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 498 for (CeedInt i = 0; i < elem_size; i++) { 499 for (CeedInt j = 0; j < elem_size; j++) { 500 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 501 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 502 503 const CeedInt row = elem_dof_a[elem_dof_index_row]; 504 const CeedInt col = elem_dof_a[elem_dof_index_col]; 505 506 rows[offset + count] = row; 507 cols[offset + count] = col; 508 count++; 509 } 510 } 511 } 512 } 513 } 514 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 515 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 516 CeedCall(CeedVectorDestroy(&elem_dof)); 517 518 return CEED_ERROR_SUCCESS; 519 } 520 521 /** 522 @brief Assemble nonzero entries for non-composite operator 523 524 Users should generally use CeedOperatorLinearAssemble() 525 526 @param[in] op CeedOperator to assemble 527 @param[in] offset Offset for number of entries 528 @param[out] values Values to assemble into matrix 529 530 @return An error code: 0 - success, otherwise - failure 531 532 @ref Developer 533 **/ 534 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 535 Ceed ceed; 536 bool is_composite; 537 CeedCall(CeedOperatorGetCeed(op, &ceed)); 538 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 539 540 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 541 542 // Early exit for empty operator 543 { 544 CeedInt num_elem = 0; 545 546 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 547 if (num_elem == 0) return CEED_ERROR_SUCCESS; 548 } 549 550 if (op->LinearAssembleSingle) { 551 // Backend version 552 CeedCall(op->LinearAssembleSingle(op, offset, values)); 553 return CEED_ERROR_SUCCESS; 554 } else { 555 // Operator fallback 556 CeedOperator op_fallback; 557 558 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 559 if (op_fallback) { 560 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 561 return CEED_ERROR_SUCCESS; 562 } 563 } 564 565 // Assemble QFunction 566 CeedQFunction qf; 567 CeedCall(CeedOperatorGetQFunction(op, &qf)); 568 CeedVector assembled_qf; 569 CeedElemRestriction rstr_q; 570 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 571 CeedSize qf_length; 572 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 573 574 CeedInt num_input_fields, num_output_fields; 575 CeedOperatorField *input_fields; 576 CeedOperatorField *output_fields; 577 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 578 579 // Get assembly data 580 CeedOperatorAssemblyData data; 581 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 582 const CeedEvalMode **eval_modes_in, **eval_modes_out; 583 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 584 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 585 &eval_modes_out, NULL, NULL)); 586 CeedBasis *bases; 587 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 588 CeedBasis basis_in = bases[0]; 589 590 CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 591 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 592 593 CeedElemRestriction active_rstr; 594 CeedInt num_elem, elem_size, num_qpts, num_comp; 595 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 596 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 597 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 598 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 599 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 600 601 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 602 603 // loop over elements and put in data structure 604 const CeedScalar *assembled_qf_array; 605 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 606 607 CeedInt layout_qf[3]; 608 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 609 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 610 611 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 612 const CeedScalar **B_mats_in, **B_mats_out; 613 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 614 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 615 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 616 CeedScalar elem_mat[elem_size * elem_size]; 617 CeedInt count = 0; 618 CeedScalar *vals; 619 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 620 for (CeedInt e = 0; e < num_elem; e++) { 621 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 622 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 623 // Compute B^T*D 624 for (CeedInt n = 0; n < elem_size; n++) { 625 for (CeedInt q = 0; q < num_qpts; q++) { 626 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 627 const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 628 CeedScalar sum = 0.0; 629 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 630 const CeedInt b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 631 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 632 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 633 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 634 } 635 BTD_mat[btd_index] = sum; 636 } 637 } 638 } 639 // form element matrix itself (for each block component) 640 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 641 642 // put element matrix in coordinate data structure 643 for (CeedInt i = 0; i < elem_size; i++) { 644 for (CeedInt j = 0; j < elem_size; j++) { 645 vals[offset + count] = elem_mat[i * elem_size + j]; 646 count++; 647 } 648 } 649 } 650 } 651 } 652 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 653 CeedCall(CeedVectorRestoreArray(values, &vals)); 654 655 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 656 CeedCall(CeedVectorDestroy(&assembled_qf)); 657 658 return CEED_ERROR_SUCCESS; 659 } 660 661 /** 662 @brief Count number of entries for assembled CeedOperator 663 664 @param[in] op CeedOperator to assemble 665 @param[out] num_entries Number of entries in assembled representation 666 667 @return An error code: 0 - success, otherwise - failure 668 669 @ref Utility 670 **/ 671 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 672 bool is_composite; 673 CeedElemRestriction rstr; 674 CeedInt num_elem, elem_size, num_comp; 675 676 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 677 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 678 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 679 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 680 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 681 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 682 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 683 684 return CEED_ERROR_SUCCESS; 685 } 686 687 /** 688 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 689 690 @param[in] op_fine Fine grid operator 691 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 692 @param[in] rstr_coarse Coarse grid restriction 693 @param[in] basis_coarse Coarse grid active vector basis 694 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 695 @param[out] op_coarse Coarse grid operator 696 @param[out] op_prolong Coarse to fine operator, or NULL 697 @param[out] op_restrict Fine to coarse operator, or NULL 698 699 @return An error code: 0 - success, otherwise - failure 700 701 @ref Developer 702 **/ 703 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 704 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 705 Ceed ceed; 706 CeedVector mult_vec = NULL; 707 CeedElemRestriction rstr_p_mult_fine = NULL; 708 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 709 710 // Check for composite operator 711 bool is_composite; 712 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 713 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 714 715 // Coarse Grid 716 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 717 CeedElemRestriction rstr_fine = NULL; 718 // -- Clone input fields 719 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 720 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 721 rstr_fine = op_fine->input_fields[i]->elem_rstr; 722 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 723 } else { 724 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 725 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 726 } 727 } 728 // -- Clone output fields 729 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 730 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 731 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 732 } else { 733 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 734 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 735 } 736 } 737 // -- Clone QFunctionAssemblyData 738 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 739 740 // Multiplicity vector 741 if (op_restrict || op_prolong) { 742 CeedVector mult_e_vec; 743 744 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 745 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 746 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 747 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 748 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 749 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 750 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 751 CeedCall(CeedVectorDestroy(&mult_e_vec)); 752 CeedCall(CeedVectorReciprocal(mult_vec)); 753 } 754 755 // Clone name 756 bool has_name = op_fine->name; 757 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 758 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 759 760 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 761 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 762 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 763 764 // Restriction/Prolongation Operators 765 CeedInt num_comp; 766 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 767 768 // Restriction 769 if (op_restrict) { 770 CeedInt *num_comp_r_data; 771 CeedQFunction qf_restrict; 772 CeedQFunctionContext ctx_r; 773 774 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 775 CeedCall(CeedCalloc(1, &num_comp_r_data)); 776 num_comp_r_data[0] = num_comp; 777 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 778 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 779 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 780 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 781 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 782 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 783 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 784 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 785 786 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 787 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 788 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 789 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 790 791 // Set name 792 char *restriction_name; 793 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 794 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 795 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 796 CeedCall(CeedFree(&restriction_name)); 797 798 // Check 799 CeedCall(CeedOperatorCheckReady(*op_restrict)); 800 801 // Cleanup 802 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 803 } 804 805 // Prolongation 806 if (op_prolong) { 807 CeedInt *num_comp_p_data; 808 CeedQFunction qf_prolong; 809 CeedQFunctionContext ctx_p; 810 811 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 812 CeedCall(CeedCalloc(1, &num_comp_p_data)); 813 num_comp_p_data[0] = num_comp; 814 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 815 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 816 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 817 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 818 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 819 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 820 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 821 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 822 823 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 824 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 825 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 826 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 827 828 // Set name 829 char *prolongation_name; 830 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 831 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 832 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 833 CeedCall(CeedFree(&prolongation_name)); 834 835 // Check 836 CeedCall(CeedOperatorCheckReady(*op_prolong)); 837 838 // Cleanup 839 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 840 } 841 842 // Check 843 CeedCall(CeedOperatorCheckReady(*op_coarse)); 844 845 // Cleanup 846 CeedCall(CeedVectorDestroy(&mult_vec)); 847 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 848 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 849 850 return CEED_ERROR_SUCCESS; 851 } 852 853 /** 854 @brief Build 1D mass matrix and Laplacian with perturbation 855 856 @param[in] interp_1d Interpolation matrix in one dimension 857 @param[in] grad_1d Gradient matrix in one dimension 858 @param[in] q_weight_1d Quadrature weights in one dimension 859 @param[in] P_1d Number of basis nodes in one dimension 860 @param[in] Q_1d Number of quadrature points in one dimension 861 @param[in] dim Dimension of basis 862 @param[out] mass Assembled mass matrix in one dimension 863 @param[out] laplace Assembled perturbed Laplacian in one dimension 864 865 @return An error code: 0 - success, otherwise - failure 866 867 @ref Developer 868 **/ 869 CeedPragmaOptimizeOff 870 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 871 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 872 for (CeedInt i = 0; i < P_1d; i++) { 873 for (CeedInt j = 0; j < P_1d; j++) { 874 CeedScalar sum = 0.0; 875 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 876 mass[i + j * P_1d] = sum; 877 } 878 } 879 // -- Laplacian 880 for (CeedInt i = 0; i < P_1d; i++) { 881 for (CeedInt j = 0; j < P_1d; j++) { 882 CeedScalar sum = 0.0; 883 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 884 laplace[i + j * P_1d] = sum; 885 } 886 } 887 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 888 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 889 return CEED_ERROR_SUCCESS; 890 } 891 CeedPragmaOptimizeOn 892 893 /// @} 894 895 /// ---------------------------------------------------------------------------- 896 /// CeedOperator Backend API 897 /// ---------------------------------------------------------------------------- 898 /// @addtogroup CeedOperatorBackend 899 /// @{ 900 901 /** 902 @brief Create object holding CeedQFunction assembly data for CeedOperator 903 904 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 905 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 906 907 @return An error code: 0 - success, otherwise - failure 908 909 @ref Backend 910 **/ 911 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 912 CeedCall(CeedCalloc(1, data)); 913 (*data)->ref_count = 1; 914 (*data)->ceed = ceed; 915 CeedCall(CeedReference(ceed)); 916 917 return CEED_ERROR_SUCCESS; 918 } 919 920 /** 921 @brief Increment the reference counter for a CeedQFunctionAssemblyData 922 923 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 924 925 @return An error code: 0 - success, otherwise - failure 926 927 @ref Backend 928 **/ 929 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 930 data->ref_count++; 931 return CEED_ERROR_SUCCESS; 932 } 933 934 /** 935 @brief Set re-use of CeedQFunctionAssemblyData 936 937 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 938 @param[in] reuse_data Boolean flag indicating data re-use 939 940 @return An error code: 0 - success, otherwise - failure 941 942 @ref Backend 943 **/ 944 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 945 data->reuse_data = reuse_data; 946 data->needs_data_update = true; 947 return CEED_ERROR_SUCCESS; 948 } 949 950 /** 951 @brief Mark QFunctionAssemblyData as stale 952 953 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 954 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 955 956 @return An error code: 0 - success, otherwise - failure 957 958 @ref Backend 959 **/ 960 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 961 data->needs_data_update = needs_data_update; 962 return CEED_ERROR_SUCCESS; 963 } 964 965 /** 966 @brief Determine if QFunctionAssemblyData needs update 967 968 @param[in] data CeedQFunctionAssemblyData to mark as stale 969 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 970 971 @return An error code: 0 - success, otherwise - failure 972 973 @ref Backend 974 **/ 975 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 976 *is_update_needed = !data->reuse_data || data->needs_data_update; 977 return CEED_ERROR_SUCCESS; 978 } 979 980 /** 981 @brief Copy the pointer to a CeedQFunctionAssemblyData. 982 983 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 984 985 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 986 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 987 CeedQFunctionAssemblyData. 988 989 @param[in] data CeedQFunctionAssemblyData to copy reference to 990 @param[in,out] data_copy Variable to store copied reference 991 992 @return An error code: 0 - success, otherwise - failure 993 994 @ref Backend 995 **/ 996 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 997 CeedCall(CeedQFunctionAssemblyDataReference(data)); 998 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 999 *data_copy = data; 1000 return CEED_ERROR_SUCCESS; 1001 } 1002 1003 /** 1004 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1005 1006 @param[in] data CeedQFunctionAssemblyData to retrieve status 1007 @param[out] is_setup Boolean flag for setup status 1008 1009 @return An error code: 0 - success, otherwise - failure 1010 1011 @ref Backend 1012 **/ 1013 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1014 *is_setup = data->is_setup; 1015 return CEED_ERROR_SUCCESS; 1016 } 1017 1018 /** 1019 @brief Set internal objects for CeedQFunctionAssemblyData 1020 1021 @param[in,out] data CeedQFunctionAssemblyData to set objects 1022 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1023 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1024 1025 @return An error code: 0 - success, otherwise - failure 1026 1027 @ref Backend 1028 **/ 1029 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1030 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1031 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1032 1033 data->is_setup = true; 1034 return CEED_ERROR_SUCCESS; 1035 } 1036 1037 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1038 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1039 1040 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1041 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1042 1043 return CEED_ERROR_SUCCESS; 1044 } 1045 1046 /** 1047 @brief Destroy CeedQFunctionAssemblyData 1048 1049 @param[in,out] data CeedQFunctionAssemblyData to destroy 1050 1051 @return An error code: 0 - success, otherwise - failure 1052 1053 @ref Backend 1054 **/ 1055 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1056 if (!*data || --(*data)->ref_count > 0) { 1057 *data = NULL; 1058 return CEED_ERROR_SUCCESS; 1059 } 1060 CeedCall(CeedDestroy(&(*data)->ceed)); 1061 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1062 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1063 1064 CeedCall(CeedFree(data)); 1065 return CEED_ERROR_SUCCESS; 1066 } 1067 1068 /** 1069 @brief Get CeedOperatorAssemblyData 1070 1071 @param[in] op CeedOperator to assemble 1072 @param[out] data CeedQFunctionAssemblyData 1073 1074 @return An error code: 0 - success, otherwise - failure 1075 1076 @ref Backend 1077 **/ 1078 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1079 if (!op->op_assembled) { 1080 CeedOperatorAssemblyData data; 1081 1082 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1083 op->op_assembled = data; 1084 } 1085 *data = op->op_assembled; 1086 1087 return CEED_ERROR_SUCCESS; 1088 } 1089 1090 /** 1091 @brief Create object holding CeedOperator assembly data. 1092 1093 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1094 An array with references to the corresponding active CeedElemRestrictions is also stored. 1095 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1096 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1097 CeedEvalMode. 1098 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1099 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1100 1101 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1102 @param[in] op CeedOperator to be assembled 1103 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1104 1105 @return An error code: 0 - success, otherwise - failure 1106 1107 @ref Backend 1108 **/ 1109 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1110 CeedInt num_active_bases = 0; 1111 1112 // Allocate 1113 CeedCall(CeedCalloc(1, data)); 1114 (*data)->ceed = ceed; 1115 CeedCall(CeedReference(ceed)); 1116 1117 // Build OperatorAssembly data 1118 CeedQFunction qf; 1119 CeedQFunctionField *qf_fields; 1120 CeedOperatorField *op_fields; 1121 CeedInt num_input_fields; 1122 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1123 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1124 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1125 1126 // Determine active input basis 1127 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1128 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1129 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1130 for (CeedInt i = 0; i < num_input_fields; i++) { 1131 CeedVector vec; 1132 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1133 if (vec == CEED_VECTOR_ACTIVE) { 1134 CeedBasis basis_in = NULL; 1135 CeedEvalMode eval_mode; 1136 CeedInt index = -1, dim, num_comp, q_comp; 1137 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1138 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1139 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1140 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1141 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1142 for (CeedInt i = 0; i < num_active_bases; i++) { 1143 if ((*data)->active_bases[i] == basis_in) index = i; 1144 } 1145 if (index == -1) { 1146 CeedElemRestriction elem_rstr_in; 1147 index = num_active_bases; 1148 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1149 (*data)->active_bases[num_active_bases] = NULL; 1150 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1151 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1152 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1153 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1154 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1155 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1156 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1157 num_eval_modes_in[index] = 0; 1158 num_eval_modes_out[index] = 0; 1159 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1160 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1161 eval_modes_in[index] = NULL; 1162 eval_modes_out[index] = NULL; 1163 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1164 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1165 eval_mode_offsets_in[index] = NULL; 1166 eval_mode_offsets_out[index] = NULL; 1167 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1168 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1169 (*data)->assembled_bases_in[index] = NULL; 1170 (*data)->assembled_bases_out[index] = NULL; 1171 num_active_bases++; 1172 } 1173 if (eval_mode != CEED_EVAL_WEIGHT) { 1174 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1175 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1176 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1177 for (CeedInt d = 0; d < q_comp; d++) { 1178 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1179 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1180 offset += num_comp; 1181 } 1182 num_eval_modes_in[index] += q_comp; 1183 } 1184 } 1185 } 1186 (*data)->num_eval_modes_in = num_eval_modes_in; 1187 (*data)->eval_modes_in = eval_modes_in; 1188 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1189 1190 // Determine active output basis 1191 CeedInt num_output_fields; 1192 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1193 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1194 offset = 0; 1195 for (CeedInt i = 0; i < num_output_fields; i++) { 1196 CeedVector vec; 1197 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1198 if (vec == CEED_VECTOR_ACTIVE) { 1199 CeedBasis basis_out = NULL; 1200 CeedEvalMode eval_mode; 1201 CeedInt index = -1, dim, num_comp, q_comp; 1202 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1203 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1204 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1205 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1206 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1207 for (CeedInt i = 0; i < num_active_bases; i++) { 1208 if ((*data)->active_bases[i] == basis_out) index = i; 1209 } 1210 if (index == -1) { 1211 CeedElemRestriction elem_rstr_out; 1212 1213 index = num_active_bases; 1214 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1215 (*data)->active_bases[num_active_bases] = NULL; 1216 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1217 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1218 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1219 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1220 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1221 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1222 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1223 num_eval_modes_in[index] = 0; 1224 num_eval_modes_out[index] = 0; 1225 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1226 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1227 eval_modes_in[index] = NULL; 1228 eval_modes_out[index] = NULL; 1229 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1230 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1231 eval_mode_offsets_in[index] = NULL; 1232 eval_mode_offsets_out[index] = NULL; 1233 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1234 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1235 (*data)->assembled_bases_in[index] = NULL; 1236 (*data)->assembled_bases_out[index] = NULL; 1237 num_active_bases++; 1238 } 1239 if (eval_mode != CEED_EVAL_WEIGHT) { 1240 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1241 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1242 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1243 for (CeedInt d = 0; d < q_comp; d++) { 1244 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1245 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1246 offset += num_comp; 1247 } 1248 num_eval_modes_out[index] += q_comp; 1249 } 1250 } 1251 } 1252 (*data)->num_output_components = offset; 1253 (*data)->num_eval_modes_out = num_eval_modes_out; 1254 (*data)->eval_modes_out = eval_modes_out; 1255 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1256 (*data)->num_active_bases = num_active_bases; 1257 1258 return CEED_ERROR_SUCCESS; 1259 } 1260 1261 /** 1262 @brief Get CeedOperator CeedEvalModes for assembly. 1263 1264 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1265 1266 @param[in] data CeedOperatorAssemblyData 1267 @param[out] num_active_bases Total number of active bases 1268 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1269 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1270 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1271 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1272 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1273 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1274 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1275 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1276 including contributions of all active bases 1277 1278 @return An error code: 0 - success, otherwise - failure 1279 1280 1281 @ref Backend 1282 **/ 1283 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1284 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1285 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1286 if (num_active_bases) *num_active_bases = data->num_active_bases; 1287 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1288 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1289 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1290 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1291 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1292 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1293 if (num_output_components) *num_output_components = data->num_output_components; 1294 1295 return CEED_ERROR_SUCCESS; 1296 } 1297 1298 /** 1299 @brief Get CeedOperator CeedBasis data for assembly. 1300 1301 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1302 1303 @param[in] data CeedOperatorAssemblyData 1304 @param[out] num_active_bases Number of active bases, or NULL 1305 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1306 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1307 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1308 1309 @return An error code: 0 - success, otherwise - failure 1310 1311 @ref Backend 1312 **/ 1313 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1314 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1315 // Assemble B_in, B_out if needed 1316 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1317 CeedInt num_qpts; 1318 1319 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1320 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1321 CeedInt num_nodes; 1322 CeedScalar *B_in = NULL, *identity = NULL; 1323 bool has_eval_none = false; 1324 1325 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1326 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1327 1328 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1329 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1330 } 1331 if (has_eval_none) { 1332 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1333 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1334 identity[i * num_nodes + i] = 1.0; 1335 } 1336 } 1337 1338 for (CeedInt q = 0; q < num_qpts; q++) { 1339 for (CeedInt n = 0; n < num_nodes; n++) { 1340 CeedInt d_in = 0, q_comp_in; 1341 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1342 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1343 const CeedInt qq = data->num_eval_modes_in[b] * q; 1344 const CeedScalar *B = NULL; 1345 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B); 1346 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1347 if (q_comp_in > 1) { 1348 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1349 else B = &B[(++d_in) * num_qpts * num_nodes]; 1350 } 1351 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1352 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1353 } 1354 } 1355 } 1356 if (identity) CeedCall(CeedFree(identity)); 1357 data->assembled_bases_in[b] = B_in; 1358 } 1359 } 1360 1361 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1362 CeedInt num_qpts; 1363 1364 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1365 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1366 CeedInt num_nodes; 1367 bool has_eval_none = false; 1368 CeedScalar *B_out = NULL, *identity = NULL; 1369 1370 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1371 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1372 1373 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1374 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1375 } 1376 if (has_eval_none) { 1377 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1378 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1379 identity[i * num_nodes + i] = 1.0; 1380 } 1381 } 1382 1383 for (CeedInt q = 0; q < num_qpts; q++) { 1384 for (CeedInt n = 0; n < num_nodes; n++) { 1385 CeedInt d_out = 0, q_comp_out; 1386 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1387 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1388 const CeedInt qq = data->num_eval_modes_out[b] * q; 1389 const CeedScalar *B = NULL; 1390 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B); 1391 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1392 if (q_comp_out > 1) { 1393 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1394 else B = &B[(++d_out) * num_qpts * num_nodes]; 1395 } 1396 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1397 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1398 } 1399 } 1400 } 1401 if (identity) CeedCall(CeedFree(identity)); 1402 data->assembled_bases_out[b] = B_out; 1403 } 1404 } 1405 1406 // Pass out assembled data 1407 if (active_bases) *active_bases = data->active_bases; 1408 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1409 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1410 1411 return CEED_ERROR_SUCCESS; 1412 } 1413 1414 /** 1415 @brief Get CeedOperator CeedBasis data for assembly. 1416 1417 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1418 1419 @param[in] data CeedOperatorAssemblyData 1420 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1421 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1422 1423 @return An error code: 0 - success, otherwise - failure 1424 1425 @ref Backend 1426 **/ 1427 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1428 CeedElemRestriction **active_elem_rstrs) { 1429 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1430 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1431 1432 return CEED_ERROR_SUCCESS; 1433 } 1434 1435 /** 1436 @brief Destroy CeedOperatorAssemblyData 1437 1438 @param[in,out] data CeedOperatorAssemblyData to destroy 1439 1440 @return An error code: 0 - success, otherwise - failure 1441 1442 @ref Backend 1443 **/ 1444 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1445 if (!*data) { 1446 *data = NULL; 1447 return CEED_ERROR_SUCCESS; 1448 } 1449 CeedCall(CeedDestroy(&(*data)->ceed)); 1450 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1451 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1452 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1453 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1454 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1455 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1456 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1457 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1458 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1459 } 1460 CeedCall(CeedFree(&(*data)->active_bases)); 1461 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1462 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1463 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1464 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1465 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1466 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1467 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1468 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1469 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1470 1471 CeedCall(CeedFree(data)); 1472 return CEED_ERROR_SUCCESS; 1473 } 1474 1475 /// @} 1476 1477 /// ---------------------------------------------------------------------------- 1478 /// CeedOperator Public API 1479 /// ---------------------------------------------------------------------------- 1480 /// @addtogroup CeedOperatorUser 1481 /// @{ 1482 1483 /** 1484 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1485 1486 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1487 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1488 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1489 1490 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1491 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1492 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1493 1494 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1495 1496 @param[in] op CeedOperator to assemble CeedQFunction 1497 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1498 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1499 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1500 1501 @return An error code: 0 - success, otherwise - failure 1502 1503 @ref User 1504 **/ 1505 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1506 CeedCall(CeedOperatorCheckReady(op)); 1507 1508 if (op->LinearAssembleQFunction) { 1509 // Backend version 1510 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1511 } else { 1512 // Operator fallback 1513 CeedOperator op_fallback; 1514 1515 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1516 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1517 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1518 } 1519 return CEED_ERROR_SUCCESS; 1520 } 1521 1522 /** 1523 @brief Assemble CeedQFunction and store result internally. 1524 1525 Return copied references of stored data to the caller. 1526 Caller is responsible for ownership and destruction of the copied references. 1527 See also @ref CeedOperatorLinearAssembleQFunction 1528 1529 @param[in] op CeedOperator to assemble CeedQFunction 1530 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1531 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1532 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1533 1534 @return An error code: 0 - success, otherwise - failure 1535 1536 @ref User 1537 **/ 1538 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1539 CeedCall(CeedOperatorCheckReady(op)); 1540 1541 if (op->LinearAssembleQFunctionUpdate) { 1542 // Backend version 1543 bool qf_assembled_is_setup; 1544 CeedVector assembled_vec = NULL; 1545 CeedElemRestriction assembled_rstr = NULL; 1546 1547 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1548 if (qf_assembled_is_setup) { 1549 bool update_needed; 1550 1551 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1552 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1553 if (update_needed) { 1554 CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1555 } 1556 } else { 1557 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1558 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1559 } 1560 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1561 1562 // Copy reference from internally held copy 1563 *assembled = NULL; 1564 *rstr = NULL; 1565 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1566 CeedCall(CeedVectorDestroy(&assembled_vec)); 1567 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1568 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1569 } else { 1570 // Operator fallback 1571 CeedOperator op_fallback; 1572 1573 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1574 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1575 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1576 } 1577 1578 return CEED_ERROR_SUCCESS; 1579 } 1580 1581 /** 1582 @brief Assemble the diagonal of a square linear CeedOperator 1583 1584 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1585 1586 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1587 1588 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1589 1590 @param[in] op CeedOperator to assemble CeedQFunction 1591 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1592 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1593 1594 @return An error code: 0 - success, otherwise - failure 1595 1596 @ref User 1597 **/ 1598 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1599 bool is_composite; 1600 CeedCall(CeedOperatorCheckReady(op)); 1601 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1602 1603 CeedSize input_size = 0, output_size = 0; 1604 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1605 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1606 1607 // Early exit for empty operator 1608 if (!is_composite) { 1609 CeedInt num_elem = 0; 1610 1611 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1612 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1613 } 1614 1615 if (op->LinearAssembleDiagonal) { 1616 // Backend version 1617 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1618 return CEED_ERROR_SUCCESS; 1619 } else if (op->LinearAssembleAddDiagonal) { 1620 // Backend version with zeroing first 1621 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1622 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1623 return CEED_ERROR_SUCCESS; 1624 } else { 1625 // Operator fallback 1626 CeedOperator op_fallback; 1627 1628 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1629 if (op_fallback) { 1630 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1631 return CEED_ERROR_SUCCESS; 1632 } 1633 } 1634 // Default interface implementation 1635 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1636 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1637 1638 return CEED_ERROR_SUCCESS; 1639 } 1640 1641 /** 1642 @brief Assemble the diagonal of a square linear CeedOperator 1643 1644 This sums into a CeedVector the diagonal of a linear CeedOperator. 1645 1646 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1647 1648 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1649 1650 @param[in] op CeedOperator to assemble CeedQFunction 1651 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1652 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1653 1654 @return An error code: 0 - success, otherwise - failure 1655 1656 @ref User 1657 **/ 1658 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1659 bool is_composite; 1660 CeedCall(CeedOperatorCheckReady(op)); 1661 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1662 1663 CeedSize input_size = 0, output_size = 0; 1664 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1665 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1666 1667 // Early exit for empty operator 1668 if (!is_composite) { 1669 CeedInt num_elem = 0; 1670 1671 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1672 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1673 } 1674 1675 if (op->LinearAssembleAddDiagonal) { 1676 // Backend version 1677 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1678 return CEED_ERROR_SUCCESS; 1679 } else { 1680 // Operator fallback 1681 CeedOperator op_fallback; 1682 1683 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1684 if (op_fallback) { 1685 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1686 return CEED_ERROR_SUCCESS; 1687 } 1688 } 1689 // Default interface implementation 1690 if (is_composite) { 1691 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1692 } else { 1693 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1694 } 1695 1696 return CEED_ERROR_SUCCESS; 1697 } 1698 1699 /** 1700 @brief Assemble the point block diagonal of a square linear CeedOperator 1701 1702 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1703 1704 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1705 1706 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1707 1708 @param[in] op CeedOperator to assemble CeedQFunction 1709 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1710 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1711 component in]. 1712 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1713 1714 @return An error code: 0 - success, otherwise - failure 1715 1716 @ref User 1717 **/ 1718 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1719 bool is_composite; 1720 CeedCall(CeedOperatorCheckReady(op)); 1721 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1722 1723 CeedSize input_size = 0, output_size = 0; 1724 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1725 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1726 1727 // Early exit for empty operator 1728 if (!is_composite) { 1729 CeedInt num_elem = 0; 1730 1731 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1732 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1733 } 1734 1735 if (op->LinearAssemblePointBlockDiagonal) { 1736 // Backend version 1737 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1738 return CEED_ERROR_SUCCESS; 1739 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1740 // Backend version with zeroing first 1741 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1742 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1743 return CEED_ERROR_SUCCESS; 1744 } else { 1745 // Operator fallback 1746 CeedOperator op_fallback; 1747 1748 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1749 if (op_fallback) { 1750 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1751 return CEED_ERROR_SUCCESS; 1752 } 1753 } 1754 // Default interface implementation 1755 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1756 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1757 1758 return CEED_ERROR_SUCCESS; 1759 } 1760 1761 /** 1762 @brief Assemble the point block diagonal of a square linear CeedOperator 1763 1764 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1765 1766 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1767 1768 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1769 1770 @param[in] op CeedOperator to assemble CeedQFunction 1771 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1772 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1773 component in]. 1774 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1775 1776 @return An error code: 0 - success, otherwise - failure 1777 1778 @ref User 1779 **/ 1780 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1781 bool is_composite; 1782 CeedCall(CeedOperatorCheckReady(op)); 1783 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1784 1785 CeedSize input_size = 0, output_size = 0; 1786 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1787 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1788 1789 // Early exit for empty operator 1790 if (!is_composite) { 1791 CeedInt num_elem = 0; 1792 1793 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1794 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1795 } 1796 1797 if (op->LinearAssembleAddPointBlockDiagonal) { 1798 // Backend version 1799 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1800 return CEED_ERROR_SUCCESS; 1801 } else { 1802 // Operator fallback 1803 CeedOperator op_fallback; 1804 1805 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1806 if (op_fallback) { 1807 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1808 return CEED_ERROR_SUCCESS; 1809 } 1810 } 1811 // Default interface implementation 1812 if (is_composite) { 1813 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1814 } else { 1815 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1816 } 1817 1818 return CEED_ERROR_SUCCESS; 1819 } 1820 1821 /** 1822 @brief Fully assemble the nonzero pattern of a linear operator. 1823 1824 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1825 1826 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1827 matrix in entry (i, j). 1828 Note that the (i, j) pairs are not unique and may repeat. 1829 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1830 1831 This will generally be slow unless your operator is low-order. 1832 1833 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1834 1835 @param[in] op CeedOperator to assemble 1836 @param[out] num_entries Number of entries in coordinate nonzero pattern 1837 @param[out] rows Row number for each entry 1838 @param[out] cols Column number for each entry 1839 1840 @ref User 1841 **/ 1842 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1843 CeedInt num_suboperators, single_entries; 1844 CeedOperator *sub_operators; 1845 bool is_composite; 1846 CeedCall(CeedOperatorCheckReady(op)); 1847 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1848 1849 if (op->LinearAssembleSymbolic) { 1850 // Backend version 1851 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1852 return CEED_ERROR_SUCCESS; 1853 } else { 1854 // Operator fallback 1855 CeedOperator op_fallback; 1856 1857 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1858 if (op_fallback) { 1859 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1860 return CEED_ERROR_SUCCESS; 1861 } 1862 } 1863 1864 // Default interface implementation 1865 1866 // count entries and allocate rows, cols arrays 1867 *num_entries = 0; 1868 if (is_composite) { 1869 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1870 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1871 for (CeedInt k = 0; k < num_suboperators; ++k) { 1872 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1873 *num_entries += single_entries; 1874 } 1875 } else { 1876 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1877 *num_entries += single_entries; 1878 } 1879 CeedCall(CeedCalloc(*num_entries, rows)); 1880 CeedCall(CeedCalloc(*num_entries, cols)); 1881 1882 // assemble nonzero locations 1883 CeedInt offset = 0; 1884 if (is_composite) { 1885 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1886 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1887 for (CeedInt k = 0; k < num_suboperators; ++k) { 1888 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1889 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1890 offset += single_entries; 1891 } 1892 } else { 1893 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1894 } 1895 1896 return CEED_ERROR_SUCCESS; 1897 } 1898 1899 /** 1900 @brief Fully assemble the nonzero entries of a linear operator. 1901 1902 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1903 1904 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1905 matrix in entry (i, j). 1906 Note that the (i, j) pairs are not unique and may repeat. 1907 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1908 1909 This will generally be slow unless your operator is low-order. 1910 1911 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1912 1913 @param[in] op CeedOperator to assemble 1914 @param[out] values Values to assemble into matrix 1915 1916 @ref User 1917 **/ 1918 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1919 CeedInt num_suboperators, single_entries = 0; 1920 CeedOperator *sub_operators; 1921 bool is_composite; 1922 CeedCall(CeedOperatorCheckReady(op)); 1923 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1924 1925 // Early exit for empty operator 1926 if (!is_composite) { 1927 CeedInt num_elem = 0; 1928 1929 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1930 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1931 } 1932 1933 if (op->LinearAssemble) { 1934 // Backend version 1935 CeedCall(op->LinearAssemble(op, values)); 1936 return CEED_ERROR_SUCCESS; 1937 } else { 1938 // Operator fallback 1939 CeedOperator op_fallback; 1940 1941 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1942 if (op_fallback) { 1943 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 1944 return CEED_ERROR_SUCCESS; 1945 } 1946 } 1947 1948 // Default interface implementation 1949 CeedInt offset = 0; 1950 CeedCall(CeedVectorSetValue(values, 0.0)); 1951 if (is_composite) { 1952 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1953 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1954 for (CeedInt k = 0; k < num_suboperators; k++) { 1955 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 1956 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1957 offset += single_entries; 1958 } 1959 } else { 1960 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 1961 } 1962 1963 return CEED_ERROR_SUCCESS; 1964 } 1965 1966 /** 1967 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 1968 1969 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1970 1971 @param[in] op Composite CeedOperator 1972 @param[in] num_skip_indices Number of suboperators to skip 1973 @param[in] skip_indices Array of indices of suboperators to skip 1974 @param[out] mult Vector to store multiplicity (of size l_size) 1975 1976 @return An error code: 0 - success, otherwise - failure 1977 1978 @ref User 1979 **/ 1980 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 1981 CeedCall(CeedOperatorCheckReady(op)); 1982 1983 Ceed ceed; 1984 CeedInt num_suboperators; 1985 CeedSize l_vec_len; 1986 CeedScalar *mult_array; 1987 CeedVector ones_l_vec; 1988 CeedElemRestriction elem_rstr; 1989 CeedOperator *sub_operators; 1990 1991 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1992 1993 // Zero mult vector 1994 CeedCall(CeedVectorSetValue(mult, 0.0)); 1995 1996 // Get suboperators 1997 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1998 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1999 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2000 2001 // Work vector 2002 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2003 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2004 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2005 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2006 2007 // Compute multiplicity across suboperators 2008 for (CeedInt i = 0; i < num_suboperators; i++) { 2009 const CeedScalar *sub_mult_array; 2010 CeedVector sub_mult_l_vec, ones_e_vec; 2011 2012 // -- Check for suboperator to skip 2013 for (CeedInt j = 0; j < num_skip_indices; j++) { 2014 if (skip_indices[j] == i) continue; 2015 } 2016 2017 // -- Sub operator multiplicity 2018 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2019 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2020 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2021 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2022 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2023 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2024 // ---- Flag every node present in the current suboperator 2025 for (CeedInt j = 0; j < l_vec_len; j++) { 2026 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2027 } 2028 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2029 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2030 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2031 } 2032 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2033 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2034 2035 return CEED_ERROR_SUCCESS; 2036 } 2037 2038 /** 2039 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2040 grid interpolation 2041 2042 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2043 2044 @param[in] op_fine Fine grid operator 2045 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2046 @param[in] rstr_coarse Coarse grid restriction 2047 @param[in] basis_coarse Coarse grid active vector basis 2048 @param[out] op_coarse Coarse grid operator 2049 @param[out] op_prolong Coarse to fine operator, or NULL 2050 @param[out] op_restrict Fine to coarse operator, or NULL 2051 2052 @return An error code: 0 - success, otherwise - failure 2053 2054 @ref User 2055 **/ 2056 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2057 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2058 CeedCall(CeedOperatorCheckReady(op_fine)); 2059 2060 // Build prolongation matrix, if required 2061 CeedBasis basis_c_to_f = NULL; 2062 if (op_prolong || op_restrict) { 2063 CeedBasis basis_fine; 2064 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2065 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2066 } 2067 2068 // Core code 2069 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2070 2071 return CEED_ERROR_SUCCESS; 2072 } 2073 2074 /** 2075 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2076 2077 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2078 2079 @param[in] op_fine Fine grid operator 2080 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2081 @param[in] rstr_coarse Coarse grid restriction 2082 @param[in] basis_coarse Coarse grid active vector basis 2083 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2084 @param[out] op_coarse Coarse grid operator 2085 @param[out] op_prolong Coarse to fine operator, or NULL 2086 @param[out] op_restrict Fine to coarse operator, or NULL 2087 2088 @return An error code: 0 - success, otherwise - failure 2089 2090 @ref User 2091 **/ 2092 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2093 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2094 CeedOperator *op_restrict) { 2095 CeedCall(CeedOperatorCheckReady(op_fine)); 2096 Ceed ceed; 2097 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2098 2099 // Check for compatible quadrature spaces 2100 CeedBasis basis_fine; 2101 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2102 CeedInt Q_f, Q_c; 2103 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2104 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2105 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2106 2107 // Create coarse to fine basis, if required 2108 CeedBasis basis_c_to_f = NULL; 2109 if (op_prolong || op_restrict) { 2110 // Check if interpolation matrix is provided 2111 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2112 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2113 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2114 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2115 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2116 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2117 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2118 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2119 CeedScalar *q_ref, *q_weight, *grad; 2120 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2121 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2122 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2123 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2124 CeedCall(CeedFree(&q_ref)); 2125 CeedCall(CeedFree(&q_weight)); 2126 CeedCall(CeedFree(&grad)); 2127 } 2128 2129 // Core code 2130 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2131 return CEED_ERROR_SUCCESS; 2132 } 2133 2134 /** 2135 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2136 2137 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2138 2139 @param[in] op_fine Fine grid operator 2140 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2141 @param[in] rstr_coarse Coarse grid restriction 2142 @param[in] basis_coarse Coarse grid active vector basis 2143 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2144 @param[out] op_coarse Coarse grid operator 2145 @param[out] op_prolong Coarse to fine operator, or NULL 2146 @param[out] op_restrict Fine to coarse operator, or NULL 2147 2148 @return An error code: 0 - success, otherwise - failure 2149 2150 @ref User 2151 **/ 2152 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2153 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2154 CeedOperator *op_restrict) { 2155 CeedCall(CeedOperatorCheckReady(op_fine)); 2156 Ceed ceed; 2157 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2158 2159 // Check for compatible quadrature spaces 2160 CeedBasis basis_fine; 2161 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2162 CeedInt Q_f, Q_c; 2163 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2164 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2165 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2166 2167 // Coarse to fine basis 2168 CeedBasis basis_c_to_f = NULL; 2169 if (op_prolong || op_restrict) { 2170 // Check if interpolation matrix is provided 2171 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2172 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2173 CeedElemTopology topo; 2174 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2175 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2176 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2177 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2178 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2179 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2180 CeedScalar *q_ref, *q_weight, *grad; 2181 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2182 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2183 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2184 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2185 CeedCall(CeedFree(&q_ref)); 2186 CeedCall(CeedFree(&q_weight)); 2187 CeedCall(CeedFree(&grad)); 2188 } 2189 2190 // Core code 2191 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2192 return CEED_ERROR_SUCCESS; 2193 } 2194 2195 /** 2196 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2197 2198 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2199 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2200 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2201 \hat S V\f$. 2202 The CeedOperator must be linear and non-composite. 2203 The associated CeedQFunction must therefore also be linear. 2204 2205 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2206 2207 @param[in] op CeedOperator to create element inverses 2208 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2209 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2210 2211 @return An error code: 0 - success, otherwise - failure 2212 2213 @ref User 2214 **/ 2215 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2216 CeedCall(CeedOperatorCheckReady(op)); 2217 2218 if (op->CreateFDMElementInverse) { 2219 // Backend version 2220 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2221 return CEED_ERROR_SUCCESS; 2222 } else { 2223 // Operator fallback 2224 CeedOperator op_fallback; 2225 2226 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2227 if (op_fallback) { 2228 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2229 return CEED_ERROR_SUCCESS; 2230 } 2231 } 2232 2233 // Default interface implementation 2234 Ceed ceed, ceed_parent; 2235 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2236 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2237 ceed_parent = ceed_parent ? ceed_parent : ceed; 2238 CeedQFunction qf; 2239 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2240 2241 // Determine active input basis 2242 bool interp = false, grad = false; 2243 CeedBasis basis = NULL; 2244 CeedElemRestriction rstr = NULL; 2245 CeedOperatorField *op_fields; 2246 CeedQFunctionField *qf_fields; 2247 CeedInt num_input_fields; 2248 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2249 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2250 for (CeedInt i = 0; i < num_input_fields; i++) { 2251 CeedVector vec; 2252 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2253 if (vec == CEED_VECTOR_ACTIVE) { 2254 CeedEvalMode eval_mode; 2255 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2256 interp = interp || eval_mode == CEED_EVAL_INTERP; 2257 grad = grad || eval_mode == CEED_EVAL_GRAD; 2258 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2259 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2260 } 2261 } 2262 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2263 CeedSize l_size = 1; 2264 CeedInt P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2265 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2266 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2267 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2268 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2269 CeedCall(CeedBasisGetDimension(basis, &dim)); 2270 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2271 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2272 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2273 2274 // Build and diagonalize 1D Mass and Laplacian 2275 bool is_tensor_basis; 2276 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2277 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2278 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2279 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2280 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2281 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2282 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2283 CeedCall(CeedCalloc(P_1d, &lambda)); 2284 // -- Build matrices 2285 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2286 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2287 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2288 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2289 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2290 2291 // -- Diagonalize 2292 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2293 CeedCall(CeedFree(&mass)); 2294 CeedCall(CeedFree(&laplace)); 2295 for (CeedInt i = 0; i < P_1d; i++) { 2296 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2297 } 2298 CeedCall(CeedFree(&x)); 2299 2300 // Assemble QFunction 2301 CeedVector assembled; 2302 CeedElemRestriction rstr_qf; 2303 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2304 CeedInt layout[3]; 2305 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2306 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2307 CeedScalar max_norm = 0; 2308 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2309 2310 // Calculate element averages 2311 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2312 CeedScalar *elem_avg; 2313 const CeedScalar *assembled_array, *q_weight_array; 2314 CeedVector q_weight; 2315 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2316 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2317 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2318 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2319 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2320 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2321 for (CeedInt e = 0; e < num_elem; e++) { 2322 CeedInt count = 0; 2323 for (CeedInt q = 0; q < num_qpts; q++) { 2324 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2325 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2326 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2327 count++; 2328 } 2329 } 2330 } 2331 if (count) { 2332 elem_avg[e] /= count; 2333 } else { 2334 elem_avg[e] = 1.0; 2335 } 2336 } 2337 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2338 CeedCall(CeedVectorDestroy(&assembled)); 2339 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2340 CeedCall(CeedVectorDestroy(&q_weight)); 2341 2342 // Build FDM diagonal 2343 CeedVector q_data; 2344 CeedScalar *q_data_array, *fdm_diagonal; 2345 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2346 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2347 for (CeedInt c = 0; c < num_comp; c++) { 2348 for (CeedInt n = 0; n < num_nodes; n++) { 2349 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2350 if (grad) { 2351 for (CeedInt d = 0; d < dim; d++) { 2352 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2353 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2354 } 2355 } 2356 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2357 } 2358 } 2359 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2360 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2361 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2362 for (CeedInt e = 0; e < num_elem; e++) { 2363 for (CeedInt c = 0; c < num_comp; c++) { 2364 for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2365 } 2366 } 2367 CeedCall(CeedFree(&elem_avg)); 2368 CeedCall(CeedFree(&fdm_diagonal)); 2369 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2370 2371 // Setup FDM operator 2372 // -- Basis 2373 CeedBasis fdm_basis; 2374 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2375 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2376 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2377 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2378 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2379 CeedCall(CeedFree(&fdm_interp)); 2380 CeedCall(CeedFree(&grad_dummy)); 2381 CeedCall(CeedFree(&q_ref_dummy)); 2382 CeedCall(CeedFree(&q_weight_dummy)); 2383 CeedCall(CeedFree(&lambda)); 2384 2385 // -- Restriction 2386 CeedElemRestriction rstr_qd_i; 2387 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2388 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2389 // -- QFunction 2390 CeedQFunction qf_fdm; 2391 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2392 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2393 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2394 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2395 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2396 // -- QFunction context 2397 CeedInt *num_comp_data; 2398 CeedCall(CeedCalloc(1, &num_comp_data)); 2399 num_comp_data[0] = num_comp; 2400 CeedQFunctionContext ctx_fdm; 2401 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2402 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2403 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2404 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2405 // -- Operator 2406 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2407 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2408 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2409 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2410 2411 // Cleanup 2412 CeedCall(CeedVectorDestroy(&q_data)); 2413 CeedCall(CeedBasisDestroy(&fdm_basis)); 2414 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2415 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2416 2417 return CEED_ERROR_SUCCESS; 2418 } 2419 2420 /// @} 2421