1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) { 128 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 } 130 // Cleanup 131 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 133 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 134 } 135 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 136 CeedCall(CeedOperatorCheckReady(op_fallback)); 137 op->op_fallback = op_fallback; 138 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 144 145 @param[in] op CeedOperator to retrieve fallback for 146 @param[out] op_fallback Fallback CeedOperator 147 148 @return An error code: 0 - success, otherwise - failure 149 150 @ref Developer 151 **/ 152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 153 // Create if needed 154 if (!op->op_fallback) { 155 CeedCall(CeedOperatorCreateFallback(op)); 156 } 157 if (op->op_fallback) { 158 bool is_debug; 159 160 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 161 if (is_debug) { 162 Ceed ceed, ceed_fallback; 163 const char *resource, *resource_fallback; 164 165 CeedCall(CeedOperatorGetCeed(op, &ceed)); 166 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 167 CeedCall(CeedGetResource(ceed, &resource)); 168 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 169 170 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 171 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 172 op->op_fallback); 173 } 174 } 175 *op_fallback = op->op_fallback; 176 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /** 181 @brief Select correct basis matrix pointer based on CeedEvalMode 182 183 @param[in] basis CeedBasis from which to get the basis matrix 184 @param[in] eval_mode Current basis evaluation mode 185 @param[in] identity Pointer to identity matrix 186 @param[out] basis_ptr Basis pointer to set 187 188 @ref Developer 189 **/ 190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 191 switch (eval_mode) { 192 case CEED_EVAL_NONE: 193 *basis_ptr = identity; 194 break; 195 case CEED_EVAL_INTERP: 196 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 197 break; 198 case CEED_EVAL_GRAD: 199 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 200 break; 201 case CEED_EVAL_DIV: 202 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 203 break; 204 case CEED_EVAL_CURL: 205 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 206 break; 207 case CEED_EVAL_WEIGHT: 208 break; // Caught by QF Assembly 209 } 210 assert(*basis_ptr != NULL); 211 212 return CEED_ERROR_SUCCESS; 213 } 214 215 /** 216 @brief Create point block restriction for active operator field 217 218 @param[in] rstr Original CeedElemRestriction for active field 219 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 220 221 @return An error code: 0 - success, otherwise - failure 222 223 @ref Developer 224 **/ 225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 226 Ceed ceed; 227 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 228 const CeedInt *offsets; 229 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 230 231 // Expand offsets 232 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 233 CeedSize l_size; 234 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 235 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 236 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 237 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 238 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 239 CeedInt shift = num_comp; 240 if (comp_stride != 1) shift *= num_comp; 241 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 242 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 243 pointblock_offsets[i] = offsets[i] * shift; 244 } 245 246 // Create new restriction 247 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 248 pointblock_offsets, pointblock_rstr)); 249 250 // Cleanup 251 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 252 253 return CEED_ERROR_SUCCESS; 254 } 255 256 /** 257 @brief Core logic for assembling operator diagonal or point block diagonal 258 259 @param[in] op CeedOperator to assemble point block diagonal 260 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 261 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 262 @param[out] assembled CeedVector to store assembled diagonal 263 264 @return An error code: 0 - success, otherwise - failure 265 266 @ref Developer 267 **/ 268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 269 Ceed ceed; 270 CeedCall(CeedOperatorGetCeed(op, &ceed)); 271 272 // Assemble QFunction 273 CeedQFunction qf; 274 const CeedScalar *assembled_qf_array; 275 CeedVector assembled_qf = NULL; 276 CeedElemRestriction assembled_elem_rstr = NULL; 277 CeedInt num_input_fields, num_output_fields; 278 CeedInt layout[3]; 279 280 CeedCall(CeedOperatorGetQFunction(op, &qf)); 281 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 282 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 283 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 284 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 285 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 286 287 // Get assembly data 288 CeedOperatorAssemblyData data; 289 const CeedEvalMode **eval_modes_in, **eval_modes_out; 290 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 291 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 292 CeedBasis *active_bases; 293 CeedElemRestriction *active_elem_rstrs; 294 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 295 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 296 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 297 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 298 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 299 300 // Loop over all active bases 301 for (CeedInt b = 0; b < num_active_bases; b++) { 302 // Assemble point block diagonal restriction, if needed 303 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 304 305 if (is_pointblock) { 306 CeedElemRestriction point_block_elem_rstr; 307 308 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 309 diag_elem_rstr = point_block_elem_rstr; 310 } 311 312 // Create diagonal vector 313 CeedVector elem_diag; 314 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 315 316 // Assemble element operator diagonals 317 CeedScalar *elem_diag_array; 318 CeedInt num_elem, num_nodes, num_qpts, num_components; 319 320 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 321 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 322 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 323 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 324 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 325 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 326 327 // Construct identity matrix for basis if required 328 bool has_eval_none = false; 329 CeedScalar *identity = NULL; 330 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 331 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 332 } 333 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 334 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 335 } 336 if (has_eval_none) { 337 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 338 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 339 } 340 341 // Compute the diagonal of B^T D B 342 // Each element 343 for (CeedInt e = 0; e < num_elem; e++) { 344 // Each basis eval mode pair 345 CeedInt d_out = 0, q_comp_out; 346 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 347 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 348 const CeedScalar *B_t = NULL; 349 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t); 350 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out)); 351 if (q_comp_out > 1) { 352 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 353 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 354 } 355 eval_mode_out_prev = eval_modes_out[b][e_out]; 356 357 CeedInt d_in = 0, q_comp_in; 358 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 359 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 360 const CeedScalar *B = NULL; 361 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B); 362 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in)); 363 if (q_comp_in > 1) { 364 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 365 else B = &B[(++d_in) * num_qpts * num_nodes]; 366 } 367 eval_mode_in_prev = eval_modes_in[b][e_in]; 368 369 // Each component 370 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 371 // Each qpt/node pair 372 for (CeedInt q = 0; q < num_qpts; q++) { 373 if (is_pointblock) { 374 // Point Block Diagonal 375 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 376 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 377 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 378 for (CeedInt n = 0; n < num_nodes; n++) { 379 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 380 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 381 } 382 } 383 } else { 384 // Diagonal Only 385 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 386 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 387 for (CeedInt n = 0; n < num_nodes; n++) { 388 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 389 } 390 } 391 } 392 } 393 } 394 } 395 } 396 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 397 398 // Assemble local operator diagonal 399 CeedCall(CeedElemRestrictionApplyUnsigned(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 400 401 // Cleanup 402 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 403 CeedCall(CeedVectorDestroy(&elem_diag)); 404 CeedCall(CeedFree(&identity)); 405 } 406 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 407 CeedCall(CeedVectorDestroy(&assembled_qf)); 408 409 return CEED_ERROR_SUCCESS; 410 } 411 412 /** 413 @brief Core logic for assembling composite operator diagonal 414 415 @param[in] op CeedOperator to assemble point block diagonal 416 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 417 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 418 @param[out] assembled CeedVector to store assembled diagonal 419 420 @return An error code: 0 - success, otherwise - failure 421 422 @ref Developer 423 **/ 424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 CeedInt num_sub; 427 CeedOperator *suboperators; 428 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 429 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 430 for (CeedInt i = 0; i < num_sub; i++) { 431 if (is_pointblock) { 432 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 433 } else { 434 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 435 } 436 } 437 return CEED_ERROR_SUCCESS; 438 } 439 440 /** 441 @brief Build nonzero pattern for non-composite operator 442 443 Users should generally use CeedOperatorLinearAssembleSymbolic() 444 445 Note: For operators using oriented element restrictions, entries in rows or cols may be negative indicating the assembled value at this nonzero 446 should be negated 447 448 @param[in] op CeedOperator to assemble nonzero pattern 449 @param[in] offset Offset for number of entries 450 @param[out] rows Row number for each entry 451 @param[out] cols Column number for each entry 452 453 @return An error code: 0 - success, otherwise - failure 454 455 @ref Developer 456 **/ 457 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 458 Ceed ceed; 459 bool is_composite; 460 CeedCall(CeedOperatorGetCeed(op, &ceed)); 461 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 462 463 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 464 465 CeedSize num_nodes; 466 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 467 CeedElemRestriction rstr_in; 468 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 469 CeedInt num_elem, elem_size, num_comp; 470 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 471 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 472 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 473 CeedInt layout_er[3]; 474 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 475 476 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 477 478 // Determine elem_dof relation 479 CeedVector index_vec; 480 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 481 CeedScalar *array; 482 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 483 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 484 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 485 CeedVector elem_dof; 486 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 487 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 488 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 489 const CeedScalar *elem_dof_a; 490 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 491 CeedCall(CeedVectorDestroy(&index_vec)); 492 493 // Determine i, j locations for element matrices 494 CeedInt count = 0; 495 for (CeedInt e = 0; e < num_elem; e++) { 496 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 497 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 498 for (CeedInt i = 0; i < elem_size; i++) { 499 for (CeedInt j = 0; j < elem_size; j++) { 500 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 501 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 502 503 const CeedInt row = elem_dof_a[elem_dof_index_row]; 504 const CeedInt col = elem_dof_a[elem_dof_index_col]; 505 506 rows[offset + count] = row; 507 cols[offset + count] = col; 508 count++; 509 } 510 } 511 } 512 } 513 } 514 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 515 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 516 CeedCall(CeedVectorDestroy(&elem_dof)); 517 518 return CEED_ERROR_SUCCESS; 519 } 520 521 /** 522 @brief Assemble nonzero entries for non-composite operator 523 524 Users should generally use CeedOperatorLinearAssemble() 525 526 @param[in] op CeedOperator to assemble 527 @param[in] offset Offset for number of entries 528 @param[out] values Values to assemble into matrix 529 530 @return An error code: 0 - success, otherwise - failure 531 532 @ref Developer 533 **/ 534 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 535 Ceed ceed; 536 bool is_composite; 537 CeedCall(CeedOperatorGetCeed(op, &ceed)); 538 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 539 540 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 541 542 // Early exit for empty operator 543 { 544 CeedInt num_elem = 0; 545 546 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 547 if (num_elem == 0) return CEED_ERROR_SUCCESS; 548 } 549 550 if (op->LinearAssembleSingle) { 551 // Backend version 552 CeedCall(op->LinearAssembleSingle(op, offset, values)); 553 return CEED_ERROR_SUCCESS; 554 } else { 555 // Operator fallback 556 CeedOperator op_fallback; 557 558 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 559 if (op_fallback) { 560 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 561 return CEED_ERROR_SUCCESS; 562 } 563 } 564 565 // Assemble QFunction 566 CeedQFunction qf; 567 CeedCall(CeedOperatorGetQFunction(op, &qf)); 568 CeedVector assembled_qf = NULL; 569 CeedElemRestriction rstr_q = NULL; 570 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 571 CeedSize qf_length; 572 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 573 574 CeedInt num_input_fields, num_output_fields; 575 CeedOperatorField *input_fields; 576 CeedOperatorField *output_fields; 577 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 578 579 // Get assembly data 580 CeedOperatorAssemblyData data; 581 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 582 const CeedEvalMode **eval_modes_in, **eval_modes_out; 583 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 584 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 585 &eval_modes_out, NULL, NULL)); 586 CeedBasis *bases; 587 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 588 CeedBasis basis_in = bases[0]; 589 590 CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 591 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 592 593 CeedElemRestriction active_rstr; 594 CeedInt num_elem, elem_size, num_qpts, num_comp; 595 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 596 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 597 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 598 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 599 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 600 601 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 602 603 // loop over elements and put in data structure 604 const CeedScalar *assembled_qf_array; 605 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 606 607 CeedInt layout_qf[3]; 608 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 609 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 610 611 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 612 const CeedScalar **B_mats_in, **B_mats_out; 613 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 614 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 615 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 616 CeedScalar elem_mat[elem_size * elem_size]; 617 CeedInt count = 0; 618 CeedScalar *vals; 619 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 620 for (CeedInt e = 0; e < num_elem; e++) { 621 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 622 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 623 // Compute B^T*D 624 for (CeedInt n = 0; n < elem_size; n++) { 625 for (CeedInt q = 0; q < num_qpts; q++) { 626 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 627 const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 628 CeedScalar sum = 0.0; 629 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 630 const CeedInt b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 631 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 632 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 633 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 634 } 635 BTD_mat[btd_index] = sum; 636 } 637 } 638 } 639 // form element matrix itself (for each block component) 640 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 641 642 // put element matrix in coordinate data structure 643 for (CeedInt i = 0; i < elem_size; i++) { 644 for (CeedInt j = 0; j < elem_size; j++) { 645 vals[offset + count] = elem_mat[i * elem_size + j]; 646 count++; 647 } 648 } 649 } 650 } 651 } 652 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 653 CeedCall(CeedVectorRestoreArray(values, &vals)); 654 655 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 656 CeedCall(CeedVectorDestroy(&assembled_qf)); 657 658 return CEED_ERROR_SUCCESS; 659 } 660 661 /** 662 @brief Count number of entries for assembled CeedOperator 663 664 @param[in] op CeedOperator to assemble 665 @param[out] num_entries Number of entries in assembled representation 666 667 @return An error code: 0 - success, otherwise - failure 668 669 @ref Utility 670 **/ 671 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 672 bool is_composite; 673 CeedElemRestriction rstr; 674 CeedInt num_elem, elem_size, num_comp; 675 676 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 677 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 678 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 679 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 680 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 681 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 682 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 683 684 return CEED_ERROR_SUCCESS; 685 } 686 687 /** 688 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 689 690 @param[in] op_fine Fine grid operator 691 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 692 @param[in] rstr_coarse Coarse grid restriction 693 @param[in] basis_coarse Coarse grid active vector basis 694 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 695 @param[out] op_coarse Coarse grid operator 696 @param[out] op_prolong Coarse to fine operator, or NULL 697 @param[out] op_restrict Fine to coarse operator, or NULL 698 699 @return An error code: 0 - success, otherwise - failure 700 701 @ref Developer 702 **/ 703 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 704 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 705 Ceed ceed; 706 CeedVector mult_vec = NULL; 707 CeedElemRestriction rstr_p_mult_fine = NULL; 708 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 709 710 // Check for composite operator 711 bool is_composite; 712 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 713 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 714 715 // Coarse Grid 716 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 717 CeedElemRestriction rstr_fine = NULL; 718 // -- Clone input fields 719 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 720 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 721 rstr_fine = op_fine->input_fields[i]->elem_rstr; 722 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 723 } else { 724 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 725 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 726 } 727 } 728 // -- Clone output fields 729 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 730 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 731 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 732 } else { 733 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 734 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 735 } 736 } 737 // -- Clone QFunctionAssemblyData 738 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 739 740 // Multiplicity vector 741 if (op_restrict || op_prolong) { 742 CeedVector mult_e_vec; 743 744 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 745 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 746 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 747 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 748 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 749 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 750 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 751 CeedCall(CeedVectorDestroy(&mult_e_vec)); 752 CeedCall(CeedVectorReciprocal(mult_vec)); 753 } 754 755 // Clone name 756 bool has_name = op_fine->name; 757 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 758 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 759 760 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 761 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 762 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 763 764 // Restriction/Prolongation Operators 765 CeedInt num_comp; 766 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 767 768 // Restriction 769 if (op_restrict) { 770 CeedInt *num_comp_r_data; 771 CeedQFunction qf_restrict; 772 CeedQFunctionContext ctx_r; 773 774 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 775 CeedCall(CeedCalloc(1, &num_comp_r_data)); 776 num_comp_r_data[0] = num_comp; 777 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 778 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 779 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 780 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 781 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 782 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 783 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 784 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 785 786 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 787 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 788 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 789 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 790 791 // Set name 792 char *restriction_name; 793 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 794 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 795 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 796 CeedCall(CeedFree(&restriction_name)); 797 798 // Check 799 CeedCall(CeedOperatorCheckReady(*op_restrict)); 800 801 // Cleanup 802 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 803 } 804 805 // Prolongation 806 if (op_prolong) { 807 CeedInt *num_comp_p_data; 808 CeedQFunction qf_prolong; 809 CeedQFunctionContext ctx_p; 810 811 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 812 CeedCall(CeedCalloc(1, &num_comp_p_data)); 813 num_comp_p_data[0] = num_comp; 814 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 815 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 816 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 817 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 818 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 819 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 820 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 821 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 822 823 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 824 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 825 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 826 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 827 828 // Set name 829 char *prolongation_name; 830 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 831 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 832 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 833 CeedCall(CeedFree(&prolongation_name)); 834 835 // Check 836 CeedCall(CeedOperatorCheckReady(*op_prolong)); 837 838 // Cleanup 839 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 840 } 841 842 // Check 843 CeedCall(CeedOperatorCheckReady(*op_coarse)); 844 845 // Cleanup 846 CeedCall(CeedVectorDestroy(&mult_vec)); 847 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 848 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 849 850 return CEED_ERROR_SUCCESS; 851 } 852 853 /** 854 @brief Build 1D mass matrix and Laplacian with perturbation 855 856 @param[in] interp_1d Interpolation matrix in one dimension 857 @param[in] grad_1d Gradient matrix in one dimension 858 @param[in] q_weight_1d Quadrature weights in one dimension 859 @param[in] P_1d Number of basis nodes in one dimension 860 @param[in] Q_1d Number of quadrature points in one dimension 861 @param[in] dim Dimension of basis 862 @param[out] mass Assembled mass matrix in one dimension 863 @param[out] laplace Assembled perturbed Laplacian in one dimension 864 865 @return An error code: 0 - success, otherwise - failure 866 867 @ref Developer 868 **/ 869 CeedPragmaOptimizeOff 870 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 871 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 872 for (CeedInt i = 0; i < P_1d; i++) { 873 for (CeedInt j = 0; j < P_1d; j++) { 874 CeedScalar sum = 0.0; 875 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 876 mass[i + j * P_1d] = sum; 877 } 878 } 879 // -- Laplacian 880 for (CeedInt i = 0; i < P_1d; i++) { 881 for (CeedInt j = 0; j < P_1d; j++) { 882 CeedScalar sum = 0.0; 883 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 884 laplace[i + j * P_1d] = sum; 885 } 886 } 887 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 888 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 889 return CEED_ERROR_SUCCESS; 890 } 891 CeedPragmaOptimizeOn 892 893 /// @} 894 895 /// ---------------------------------------------------------------------------- 896 /// CeedOperator Backend API 897 /// ---------------------------------------------------------------------------- 898 /// @addtogroup CeedOperatorBackend 899 /// @{ 900 901 /** 902 @brief Create object holding CeedQFunction assembly data for CeedOperator 903 904 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 905 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 906 907 @return An error code: 0 - success, otherwise - failure 908 909 @ref Backend 910 **/ 911 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 912 CeedCall(CeedCalloc(1, data)); 913 (*data)->ref_count = 1; 914 (*data)->ceed = ceed; 915 CeedCall(CeedReference(ceed)); 916 917 return CEED_ERROR_SUCCESS; 918 } 919 920 /** 921 @brief Increment the reference counter for a CeedQFunctionAssemblyData 922 923 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 924 925 @return An error code: 0 - success, otherwise - failure 926 927 @ref Backend 928 **/ 929 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 930 data->ref_count++; 931 return CEED_ERROR_SUCCESS; 932 } 933 934 /** 935 @brief Set re-use of CeedQFunctionAssemblyData 936 937 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 938 @param[in] reuse_data Boolean flag indicating data re-use 939 940 @return An error code: 0 - success, otherwise - failure 941 942 @ref Backend 943 **/ 944 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 945 data->reuse_data = reuse_data; 946 data->needs_data_update = true; 947 return CEED_ERROR_SUCCESS; 948 } 949 950 /** 951 @brief Mark QFunctionAssemblyData as stale 952 953 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 954 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 955 956 @return An error code: 0 - success, otherwise - failure 957 958 @ref Backend 959 **/ 960 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 961 data->needs_data_update = needs_data_update; 962 return CEED_ERROR_SUCCESS; 963 } 964 965 /** 966 @brief Determine if QFunctionAssemblyData needs update 967 968 @param[in] data CeedQFunctionAssemblyData to mark as stale 969 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 970 971 @return An error code: 0 - success, otherwise - failure 972 973 @ref Backend 974 **/ 975 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 976 *is_update_needed = !data->reuse_data || data->needs_data_update; 977 return CEED_ERROR_SUCCESS; 978 } 979 980 /** 981 @brief Copy the pointer to a CeedQFunctionAssemblyData. 982 983 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 984 985 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 986 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 987 CeedQFunctionAssemblyData. 988 989 @param[in] data CeedQFunctionAssemblyData to copy reference to 990 @param[in,out] data_copy Variable to store copied reference 991 992 @return An error code: 0 - success, otherwise - failure 993 994 @ref Backend 995 **/ 996 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 997 CeedCall(CeedQFunctionAssemblyDataReference(data)); 998 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 999 *data_copy = data; 1000 return CEED_ERROR_SUCCESS; 1001 } 1002 1003 /** 1004 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1005 1006 @param[in] data CeedQFunctionAssemblyData to retrieve status 1007 @param[out] is_setup Boolean flag for setup status 1008 1009 @return An error code: 0 - success, otherwise - failure 1010 1011 @ref Backend 1012 **/ 1013 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1014 *is_setup = data->is_setup; 1015 return CEED_ERROR_SUCCESS; 1016 } 1017 1018 /** 1019 @brief Set internal objects for CeedQFunctionAssemblyData 1020 1021 @param[in,out] data CeedQFunctionAssemblyData to set objects 1022 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1023 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1024 1025 @return An error code: 0 - success, otherwise - failure 1026 1027 @ref Backend 1028 **/ 1029 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1030 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1031 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1032 1033 data->is_setup = true; 1034 return CEED_ERROR_SUCCESS; 1035 } 1036 1037 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1038 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1039 1040 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1041 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1042 1043 return CEED_ERROR_SUCCESS; 1044 } 1045 1046 /** 1047 @brief Destroy CeedQFunctionAssemblyData 1048 1049 @param[in,out] data CeedQFunctionAssemblyData to destroy 1050 1051 @return An error code: 0 - success, otherwise - failure 1052 1053 @ref Backend 1054 **/ 1055 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1056 if (!*data || --(*data)->ref_count > 0) { 1057 *data = NULL; 1058 return CEED_ERROR_SUCCESS; 1059 } 1060 CeedCall(CeedDestroy(&(*data)->ceed)); 1061 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1062 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1063 1064 CeedCall(CeedFree(data)); 1065 return CEED_ERROR_SUCCESS; 1066 } 1067 1068 /** 1069 @brief Get CeedOperatorAssemblyData 1070 1071 @param[in] op CeedOperator to assemble 1072 @param[out] data CeedQFunctionAssemblyData 1073 1074 @return An error code: 0 - success, otherwise - failure 1075 1076 @ref Backend 1077 **/ 1078 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1079 if (!op->op_assembled) { 1080 CeedOperatorAssemblyData data; 1081 1082 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1083 op->op_assembled = data; 1084 } 1085 *data = op->op_assembled; 1086 1087 return CEED_ERROR_SUCCESS; 1088 } 1089 1090 /** 1091 @brief Create object holding CeedOperator assembly data. 1092 1093 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1094 An array with references to the corresponding active CeedElemRestrictions is also stored. 1095 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1096 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1097 CeedEvalMode. 1098 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1099 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1100 1101 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1102 @param[in] op CeedOperator to be assembled 1103 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1104 1105 @return An error code: 0 - success, otherwise - failure 1106 1107 @ref Backend 1108 **/ 1109 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1110 CeedInt num_active_bases = 0; 1111 1112 // Allocate 1113 CeedCall(CeedCalloc(1, data)); 1114 (*data)->ceed = ceed; 1115 CeedCall(CeedReference(ceed)); 1116 1117 // Build OperatorAssembly data 1118 CeedQFunction qf; 1119 CeedQFunctionField *qf_fields; 1120 CeedOperatorField *op_fields; 1121 CeedInt num_input_fields; 1122 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1123 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1124 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1125 1126 // Determine active input basis 1127 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1128 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1129 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1130 for (CeedInt i = 0; i < num_input_fields; i++) { 1131 CeedVector vec; 1132 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1133 if (vec == CEED_VECTOR_ACTIVE) { 1134 CeedBasis basis_in = NULL; 1135 CeedEvalMode eval_mode; 1136 CeedInt index = -1, dim, num_comp, q_comp; 1137 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1138 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1139 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1140 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1141 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1142 for (CeedInt i = 0; i < num_active_bases; i++) { 1143 if ((*data)->active_bases[i] == basis_in) index = i; 1144 } 1145 if (index == -1) { 1146 CeedElemRestriction elem_rstr_in; 1147 index = num_active_bases; 1148 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1149 (*data)->active_bases[num_active_bases] = NULL; 1150 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1151 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1152 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1153 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1154 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1155 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1156 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1157 num_eval_modes_in[index] = 0; 1158 num_eval_modes_out[index] = 0; 1159 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1160 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1161 eval_modes_in[index] = NULL; 1162 eval_modes_out[index] = NULL; 1163 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1164 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1165 eval_mode_offsets_in[index] = NULL; 1166 eval_mode_offsets_out[index] = NULL; 1167 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1168 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1169 (*data)->assembled_bases_in[index] = NULL; 1170 (*data)->assembled_bases_out[index] = NULL; 1171 num_active_bases++; 1172 } 1173 if (eval_mode != CEED_EVAL_WEIGHT) { 1174 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1175 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1176 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1177 for (CeedInt d = 0; d < q_comp; d++) { 1178 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1179 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1180 offset += num_comp; 1181 } 1182 num_eval_modes_in[index] += q_comp; 1183 } 1184 } 1185 } 1186 (*data)->num_eval_modes_in = num_eval_modes_in; 1187 (*data)->eval_modes_in = eval_modes_in; 1188 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1189 1190 // Determine active output basis 1191 CeedInt num_output_fields; 1192 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1193 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1194 offset = 0; 1195 for (CeedInt i = 0; i < num_output_fields; i++) { 1196 CeedVector vec; 1197 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1198 if (vec == CEED_VECTOR_ACTIVE) { 1199 CeedBasis basis_out = NULL; 1200 CeedEvalMode eval_mode; 1201 CeedInt index = -1, dim, num_comp, q_comp; 1202 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1203 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1204 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1205 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1206 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1207 for (CeedInt i = 0; i < num_active_bases; i++) { 1208 if ((*data)->active_bases[i] == basis_out) index = i; 1209 } 1210 if (index == -1) { 1211 CeedElemRestriction elem_rstr_out; 1212 1213 index = num_active_bases; 1214 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1215 (*data)->active_bases[num_active_bases] = NULL; 1216 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1217 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1218 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1219 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1220 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1221 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1222 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1223 num_eval_modes_in[index] = 0; 1224 num_eval_modes_out[index] = 0; 1225 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1226 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1227 eval_modes_in[index] = NULL; 1228 eval_modes_out[index] = NULL; 1229 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1230 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1231 eval_mode_offsets_in[index] = NULL; 1232 eval_mode_offsets_out[index] = NULL; 1233 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1234 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1235 (*data)->assembled_bases_in[index] = NULL; 1236 (*data)->assembled_bases_out[index] = NULL; 1237 num_active_bases++; 1238 } 1239 if (eval_mode != CEED_EVAL_WEIGHT) { 1240 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1241 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1242 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1243 for (CeedInt d = 0; d < q_comp; d++) { 1244 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1245 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1246 offset += num_comp; 1247 } 1248 num_eval_modes_out[index] += q_comp; 1249 } 1250 } 1251 } 1252 (*data)->num_output_components = offset; 1253 (*data)->num_eval_modes_out = num_eval_modes_out; 1254 (*data)->eval_modes_out = eval_modes_out; 1255 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1256 (*data)->num_active_bases = num_active_bases; 1257 1258 return CEED_ERROR_SUCCESS; 1259 } 1260 1261 /** 1262 @brief Get CeedOperator CeedEvalModes for assembly. 1263 1264 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1265 1266 @param[in] data CeedOperatorAssemblyData 1267 @param[out] num_active_bases Total number of active bases 1268 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1269 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1270 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1271 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1272 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1273 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1274 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1275 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1276 including contributions of all active bases 1277 1278 @return An error code: 0 - success, otherwise - failure 1279 1280 1281 @ref Backend 1282 **/ 1283 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1284 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1285 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1286 if (num_active_bases) *num_active_bases = data->num_active_bases; 1287 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1288 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1289 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1290 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1291 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1292 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1293 if (num_output_components) *num_output_components = data->num_output_components; 1294 1295 return CEED_ERROR_SUCCESS; 1296 } 1297 1298 /** 1299 @brief Get CeedOperator CeedBasis data for assembly. 1300 1301 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1302 1303 @param[in] data CeedOperatorAssemblyData 1304 @param[out] num_active_bases Number of active bases, or NULL 1305 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1306 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1307 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1308 1309 @return An error code: 0 - success, otherwise - failure 1310 1311 @ref Backend 1312 **/ 1313 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1314 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1315 // Assemble B_in, B_out if needed 1316 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1317 CeedInt num_qpts; 1318 1319 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1320 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1321 CeedInt num_nodes; 1322 CeedScalar *B_in = NULL, *identity = NULL; 1323 bool has_eval_none = false; 1324 1325 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1326 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1327 1328 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1329 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1330 } 1331 if (has_eval_none) { 1332 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1333 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1334 identity[i * num_nodes + i] = 1.0; 1335 } 1336 } 1337 1338 for (CeedInt q = 0; q < num_qpts; q++) { 1339 for (CeedInt n = 0; n < num_nodes; n++) { 1340 CeedInt d_in = 0, q_comp_in; 1341 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1342 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1343 const CeedInt qq = data->num_eval_modes_in[b] * q; 1344 const CeedScalar *B = NULL; 1345 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B); 1346 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1347 if (q_comp_in > 1) { 1348 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1349 else B = &B[(++d_in) * num_qpts * num_nodes]; 1350 } 1351 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1352 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1353 } 1354 } 1355 } 1356 if (identity) CeedCall(CeedFree(identity)); 1357 data->assembled_bases_in[b] = B_in; 1358 } 1359 } 1360 1361 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1362 CeedInt num_qpts; 1363 1364 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1365 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1366 CeedInt num_nodes; 1367 bool has_eval_none = false; 1368 CeedScalar *B_out = NULL, *identity = NULL; 1369 1370 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1371 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1372 1373 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1374 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1375 } 1376 if (has_eval_none) { 1377 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1378 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1379 identity[i * num_nodes + i] = 1.0; 1380 } 1381 } 1382 1383 for (CeedInt q = 0; q < num_qpts; q++) { 1384 for (CeedInt n = 0; n < num_nodes; n++) { 1385 CeedInt d_out = 0, q_comp_out; 1386 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1387 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1388 const CeedInt qq = data->num_eval_modes_out[b] * q; 1389 const CeedScalar *B = NULL; 1390 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B); 1391 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1392 if (q_comp_out > 1) { 1393 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1394 else B = &B[(++d_out) * num_qpts * num_nodes]; 1395 } 1396 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1397 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1398 } 1399 } 1400 } 1401 if (identity) CeedCall(CeedFree(identity)); 1402 data->assembled_bases_out[b] = B_out; 1403 } 1404 } 1405 1406 // Pass out assembled data 1407 if (active_bases) *active_bases = data->active_bases; 1408 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1409 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1410 1411 return CEED_ERROR_SUCCESS; 1412 } 1413 1414 /** 1415 @brief Get CeedOperator CeedBasis data for assembly. 1416 1417 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1418 1419 @param[in] data CeedOperatorAssemblyData 1420 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1421 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1422 1423 @return An error code: 0 - success, otherwise - failure 1424 1425 @ref Backend 1426 **/ 1427 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1428 CeedElemRestriction **active_elem_rstrs) { 1429 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1430 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1431 1432 return CEED_ERROR_SUCCESS; 1433 } 1434 1435 /** 1436 @brief Destroy CeedOperatorAssemblyData 1437 1438 @param[in,out] data CeedOperatorAssemblyData to destroy 1439 1440 @return An error code: 0 - success, otherwise - failure 1441 1442 @ref Backend 1443 **/ 1444 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1445 if (!*data) { 1446 *data = NULL; 1447 return CEED_ERROR_SUCCESS; 1448 } 1449 CeedCall(CeedDestroy(&(*data)->ceed)); 1450 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1451 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1452 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1453 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1454 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1455 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1456 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1457 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1458 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1459 } 1460 CeedCall(CeedFree(&(*data)->active_bases)); 1461 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1462 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1463 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1464 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1465 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1466 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1467 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1468 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1469 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1470 1471 CeedCall(CeedFree(data)); 1472 return CEED_ERROR_SUCCESS; 1473 } 1474 1475 /// @} 1476 1477 /// ---------------------------------------------------------------------------- 1478 /// CeedOperator Public API 1479 /// ---------------------------------------------------------------------------- 1480 /// @addtogroup CeedOperatorUser 1481 /// @{ 1482 1483 /** 1484 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1485 1486 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1487 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1488 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1489 1490 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1491 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1492 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1493 1494 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1495 1496 @param[in] op CeedOperator to assemble CeedQFunction 1497 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1498 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1499 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1500 1501 @return An error code: 0 - success, otherwise - failure 1502 1503 @ref User 1504 **/ 1505 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1506 CeedCall(CeedOperatorCheckReady(op)); 1507 1508 if (op->LinearAssembleQFunction) { 1509 // Backend version 1510 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1511 } else { 1512 // Operator fallback 1513 CeedOperator op_fallback; 1514 1515 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1516 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1517 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1518 } 1519 return CEED_ERROR_SUCCESS; 1520 } 1521 1522 /** 1523 @brief Assemble CeedQFunction and store result internally. 1524 1525 Return copied references of stored data to the caller. 1526 Caller is responsible for ownership and destruction of the copied references. 1527 See also @ref CeedOperatorLinearAssembleQFunction 1528 1529 Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers. 1530 These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object. 1531 1532 @param[in] op CeedOperator to assemble CeedQFunction 1533 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1534 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1535 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1536 1537 @return An error code: 0 - success, otherwise - failure 1538 1539 @ref User 1540 **/ 1541 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1542 CeedCall(CeedOperatorCheckReady(op)); 1543 1544 if (op->LinearAssembleQFunctionUpdate) { 1545 // Backend version 1546 bool qf_assembled_is_setup; 1547 CeedVector assembled_vec = NULL; 1548 CeedElemRestriction assembled_rstr = NULL; 1549 1550 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1551 if (qf_assembled_is_setup) { 1552 bool update_needed; 1553 1554 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1555 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1556 if (update_needed) CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1557 } else { 1558 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1559 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1560 } 1561 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1562 1563 // Copy reference from internally held copy 1564 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1565 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1566 CeedCall(CeedVectorDestroy(&assembled_vec)); 1567 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1568 } else { 1569 // Operator fallback 1570 CeedOperator op_fallback; 1571 1572 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1573 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1574 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1575 } 1576 1577 return CEED_ERROR_SUCCESS; 1578 } 1579 1580 /** 1581 @brief Assemble the diagonal of a square linear CeedOperator 1582 1583 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1584 1585 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1586 1587 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1588 1589 @param[in] op CeedOperator to assemble CeedQFunction 1590 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1591 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1592 1593 @return An error code: 0 - success, otherwise - failure 1594 1595 @ref User 1596 **/ 1597 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1598 bool is_composite; 1599 CeedCall(CeedOperatorCheckReady(op)); 1600 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1601 1602 CeedSize input_size = 0, output_size = 0; 1603 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1604 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1605 1606 // Early exit for empty operator 1607 if (!is_composite) { 1608 CeedInt num_elem = 0; 1609 1610 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1611 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1612 } 1613 1614 if (op->LinearAssembleDiagonal) { 1615 // Backend version 1616 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1617 return CEED_ERROR_SUCCESS; 1618 } else if (op->LinearAssembleAddDiagonal) { 1619 // Backend version with zeroing first 1620 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1621 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1622 return CEED_ERROR_SUCCESS; 1623 } else { 1624 // Operator fallback 1625 CeedOperator op_fallback; 1626 1627 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1628 if (op_fallback) { 1629 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1630 return CEED_ERROR_SUCCESS; 1631 } 1632 } 1633 // Default interface implementation 1634 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1635 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1636 1637 return CEED_ERROR_SUCCESS; 1638 } 1639 1640 /** 1641 @brief Assemble the diagonal of a square linear CeedOperator 1642 1643 This sums into a CeedVector the diagonal of a linear CeedOperator. 1644 1645 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1646 1647 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1648 1649 @param[in] op CeedOperator to assemble CeedQFunction 1650 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1651 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1652 1653 @return An error code: 0 - success, otherwise - failure 1654 1655 @ref User 1656 **/ 1657 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1658 bool is_composite; 1659 CeedCall(CeedOperatorCheckReady(op)); 1660 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1661 1662 CeedSize input_size = 0, output_size = 0; 1663 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1664 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1665 1666 // Early exit for empty operator 1667 if (!is_composite) { 1668 CeedInt num_elem = 0; 1669 1670 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1671 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1672 } 1673 1674 if (op->LinearAssembleAddDiagonal) { 1675 // Backend version 1676 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1677 return CEED_ERROR_SUCCESS; 1678 } else { 1679 // Operator fallback 1680 CeedOperator op_fallback; 1681 1682 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1683 if (op_fallback) { 1684 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1685 return CEED_ERROR_SUCCESS; 1686 } 1687 } 1688 // Default interface implementation 1689 if (is_composite) { 1690 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1691 } else { 1692 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1693 } 1694 1695 return CEED_ERROR_SUCCESS; 1696 } 1697 1698 /** 1699 @brief Assemble the point block diagonal of a square linear CeedOperator 1700 1701 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1702 1703 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1704 1705 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1706 1707 @param[in] op CeedOperator to assemble CeedQFunction 1708 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1709 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1710 component in]. 1711 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1712 1713 @return An error code: 0 - success, otherwise - failure 1714 1715 @ref User 1716 **/ 1717 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1718 bool is_composite; 1719 CeedCall(CeedOperatorCheckReady(op)); 1720 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1721 1722 CeedSize input_size = 0, output_size = 0; 1723 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1724 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1725 1726 // Early exit for empty operator 1727 if (!is_composite) { 1728 CeedInt num_elem = 0; 1729 1730 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1731 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1732 } 1733 1734 if (op->LinearAssemblePointBlockDiagonal) { 1735 // Backend version 1736 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1737 return CEED_ERROR_SUCCESS; 1738 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1739 // Backend version with zeroing first 1740 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1741 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1742 return CEED_ERROR_SUCCESS; 1743 } else { 1744 // Operator fallback 1745 CeedOperator op_fallback; 1746 1747 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1748 if (op_fallback) { 1749 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1750 return CEED_ERROR_SUCCESS; 1751 } 1752 } 1753 // Default interface implementation 1754 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1755 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1756 1757 return CEED_ERROR_SUCCESS; 1758 } 1759 1760 /** 1761 @brief Assemble the point block diagonal of a square linear CeedOperator 1762 1763 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1764 1765 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1766 1767 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1768 1769 @param[in] op CeedOperator to assemble CeedQFunction 1770 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1771 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1772 component in]. 1773 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1774 1775 @return An error code: 0 - success, otherwise - failure 1776 1777 @ref User 1778 **/ 1779 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1780 bool is_composite; 1781 CeedCall(CeedOperatorCheckReady(op)); 1782 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1783 1784 CeedSize input_size = 0, output_size = 0; 1785 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1786 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1787 1788 // Early exit for empty operator 1789 if (!is_composite) { 1790 CeedInt num_elem = 0; 1791 1792 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1793 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1794 } 1795 1796 if (op->LinearAssembleAddPointBlockDiagonal) { 1797 // Backend version 1798 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1799 return CEED_ERROR_SUCCESS; 1800 } else { 1801 // Operator fallback 1802 CeedOperator op_fallback; 1803 1804 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1805 if (op_fallback) { 1806 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1807 return CEED_ERROR_SUCCESS; 1808 } 1809 } 1810 // Default interface implementation 1811 if (is_composite) { 1812 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1813 } else { 1814 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1815 } 1816 1817 return CEED_ERROR_SUCCESS; 1818 } 1819 1820 /** 1821 @brief Fully assemble the nonzero pattern of a linear operator. 1822 1823 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1824 1825 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1826 matrix in entry (i, j). 1827 Note that the (i, j) pairs are not unique and may repeat. 1828 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1829 1830 This will generally be slow unless your operator is low-order. 1831 1832 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1833 1834 @param[in] op CeedOperator to assemble 1835 @param[out] num_entries Number of entries in coordinate nonzero pattern 1836 @param[out] rows Row number for each entry 1837 @param[out] cols Column number for each entry 1838 1839 @ref User 1840 **/ 1841 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1842 CeedInt num_suboperators, single_entries; 1843 CeedOperator *sub_operators; 1844 bool is_composite; 1845 CeedCall(CeedOperatorCheckReady(op)); 1846 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1847 1848 if (op->LinearAssembleSymbolic) { 1849 // Backend version 1850 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1851 return CEED_ERROR_SUCCESS; 1852 } else { 1853 // Operator fallback 1854 CeedOperator op_fallback; 1855 1856 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1857 if (op_fallback) { 1858 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1859 return CEED_ERROR_SUCCESS; 1860 } 1861 } 1862 1863 // Default interface implementation 1864 1865 // count entries and allocate rows, cols arrays 1866 *num_entries = 0; 1867 if (is_composite) { 1868 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1869 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1870 for (CeedInt k = 0; k < num_suboperators; ++k) { 1871 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1872 *num_entries += single_entries; 1873 } 1874 } else { 1875 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1876 *num_entries += single_entries; 1877 } 1878 CeedCall(CeedCalloc(*num_entries, rows)); 1879 CeedCall(CeedCalloc(*num_entries, cols)); 1880 1881 // assemble nonzero locations 1882 CeedInt offset = 0; 1883 if (is_composite) { 1884 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1885 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1886 for (CeedInt k = 0; k < num_suboperators; ++k) { 1887 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1888 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1889 offset += single_entries; 1890 } 1891 } else { 1892 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1893 } 1894 1895 return CEED_ERROR_SUCCESS; 1896 } 1897 1898 /** 1899 @brief Fully assemble the nonzero entries of a linear operator. 1900 1901 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1902 1903 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1904 matrix in entry (i, j). 1905 Note that the (i, j) pairs are not unique and may repeat. 1906 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1907 1908 This will generally be slow unless your operator is low-order. 1909 1910 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1911 1912 @param[in] op CeedOperator to assemble 1913 @param[out] values Values to assemble into matrix 1914 1915 @ref User 1916 **/ 1917 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1918 CeedInt num_suboperators, single_entries = 0; 1919 CeedOperator *sub_operators; 1920 bool is_composite; 1921 CeedCall(CeedOperatorCheckReady(op)); 1922 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1923 1924 // Early exit for empty operator 1925 if (!is_composite) { 1926 CeedInt num_elem = 0; 1927 1928 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1929 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1930 } 1931 1932 if (op->LinearAssemble) { 1933 // Backend version 1934 CeedCall(op->LinearAssemble(op, values)); 1935 return CEED_ERROR_SUCCESS; 1936 } else { 1937 // Operator fallback 1938 CeedOperator op_fallback; 1939 1940 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1941 if (op_fallback) { 1942 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 1943 return CEED_ERROR_SUCCESS; 1944 } 1945 } 1946 1947 // Default interface implementation 1948 CeedInt offset = 0; 1949 CeedCall(CeedVectorSetValue(values, 0.0)); 1950 if (is_composite) { 1951 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1952 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1953 for (CeedInt k = 0; k < num_suboperators; k++) { 1954 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 1955 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1956 offset += single_entries; 1957 } 1958 } else { 1959 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 1960 } 1961 1962 return CEED_ERROR_SUCCESS; 1963 } 1964 1965 /** 1966 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 1967 1968 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1969 1970 @param[in] op Composite CeedOperator 1971 @param[in] num_skip_indices Number of suboperators to skip 1972 @param[in] skip_indices Array of indices of suboperators to skip 1973 @param[out] mult Vector to store multiplicity (of size l_size) 1974 1975 @return An error code: 0 - success, otherwise - failure 1976 1977 @ref User 1978 **/ 1979 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 1980 CeedCall(CeedOperatorCheckReady(op)); 1981 1982 Ceed ceed; 1983 CeedInt num_suboperators; 1984 CeedSize l_vec_len; 1985 CeedScalar *mult_array; 1986 CeedVector ones_l_vec; 1987 CeedElemRestriction elem_rstr; 1988 CeedOperator *sub_operators; 1989 1990 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1991 1992 // Zero mult vector 1993 CeedCall(CeedVectorSetValue(mult, 0.0)); 1994 1995 // Get suboperators 1996 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1997 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1998 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 1999 2000 // Work vector 2001 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2002 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2003 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2004 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2005 2006 // Compute multiplicity across suboperators 2007 for (CeedInt i = 0; i < num_suboperators; i++) { 2008 const CeedScalar *sub_mult_array; 2009 CeedVector sub_mult_l_vec, ones_e_vec; 2010 2011 // -- Check for suboperator to skip 2012 for (CeedInt j = 0; j < num_skip_indices; j++) { 2013 if (skip_indices[j] == i) continue; 2014 } 2015 2016 // -- Sub operator multiplicity 2017 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2018 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2019 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2020 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2021 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2022 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2023 // ---- Flag every node present in the current suboperator 2024 for (CeedInt j = 0; j < l_vec_len; j++) { 2025 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2026 } 2027 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2028 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2029 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2030 } 2031 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2032 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2033 2034 return CEED_ERROR_SUCCESS; 2035 } 2036 2037 /** 2038 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2039 grid interpolation 2040 2041 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2042 2043 @param[in] op_fine Fine grid operator 2044 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2045 @param[in] rstr_coarse Coarse grid restriction 2046 @param[in] basis_coarse Coarse grid active vector basis 2047 @param[out] op_coarse Coarse grid operator 2048 @param[out] op_prolong Coarse to fine operator, or NULL 2049 @param[out] op_restrict Fine to coarse operator, or NULL 2050 2051 @return An error code: 0 - success, otherwise - failure 2052 2053 @ref User 2054 **/ 2055 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2056 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2057 CeedCall(CeedOperatorCheckReady(op_fine)); 2058 2059 // Build prolongation matrix, if required 2060 CeedBasis basis_c_to_f = NULL; 2061 if (op_prolong || op_restrict) { 2062 CeedBasis basis_fine; 2063 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2064 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2065 } 2066 2067 // Core code 2068 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2069 2070 return CEED_ERROR_SUCCESS; 2071 } 2072 2073 /** 2074 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2075 2076 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2077 2078 @param[in] op_fine Fine grid operator 2079 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2080 @param[in] rstr_coarse Coarse grid restriction 2081 @param[in] basis_coarse Coarse grid active vector basis 2082 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2083 @param[out] op_coarse Coarse grid operator 2084 @param[out] op_prolong Coarse to fine operator, or NULL 2085 @param[out] op_restrict Fine to coarse operator, or NULL 2086 2087 @return An error code: 0 - success, otherwise - failure 2088 2089 @ref User 2090 **/ 2091 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2092 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2093 CeedOperator *op_restrict) { 2094 CeedCall(CeedOperatorCheckReady(op_fine)); 2095 Ceed ceed; 2096 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2097 2098 // Check for compatible quadrature spaces 2099 CeedBasis basis_fine; 2100 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2101 CeedInt Q_f, Q_c; 2102 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2103 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2104 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2105 2106 // Create coarse to fine basis, if required 2107 CeedBasis basis_c_to_f = NULL; 2108 if (op_prolong || op_restrict) { 2109 // Check if interpolation matrix is provided 2110 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2111 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2112 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2113 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2114 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2115 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2116 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2117 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2118 CeedScalar *q_ref, *q_weight, *grad; 2119 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2120 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2121 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2122 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2123 CeedCall(CeedFree(&q_ref)); 2124 CeedCall(CeedFree(&q_weight)); 2125 CeedCall(CeedFree(&grad)); 2126 } 2127 2128 // Core code 2129 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2130 return CEED_ERROR_SUCCESS; 2131 } 2132 2133 /** 2134 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2135 2136 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2137 2138 @param[in] op_fine Fine grid operator 2139 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2140 @param[in] rstr_coarse Coarse grid restriction 2141 @param[in] basis_coarse Coarse grid active vector basis 2142 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2143 @param[out] op_coarse Coarse grid operator 2144 @param[out] op_prolong Coarse to fine operator, or NULL 2145 @param[out] op_restrict Fine to coarse operator, or NULL 2146 2147 @return An error code: 0 - success, otherwise - failure 2148 2149 @ref User 2150 **/ 2151 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2152 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2153 CeedOperator *op_restrict) { 2154 CeedCall(CeedOperatorCheckReady(op_fine)); 2155 Ceed ceed; 2156 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2157 2158 // Check for compatible quadrature spaces 2159 CeedBasis basis_fine; 2160 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2161 CeedInt Q_f, Q_c; 2162 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2163 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2164 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2165 2166 // Coarse to fine basis 2167 CeedBasis basis_c_to_f = NULL; 2168 if (op_prolong || op_restrict) { 2169 // Check if interpolation matrix is provided 2170 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2171 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2172 CeedElemTopology topo; 2173 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2174 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2175 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2176 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2177 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2178 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2179 CeedScalar *q_ref, *q_weight, *grad; 2180 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2181 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2182 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2183 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2184 CeedCall(CeedFree(&q_ref)); 2185 CeedCall(CeedFree(&q_weight)); 2186 CeedCall(CeedFree(&grad)); 2187 } 2188 2189 // Core code 2190 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2191 return CEED_ERROR_SUCCESS; 2192 } 2193 2194 /** 2195 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2196 2197 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2198 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2199 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2200 \hat S V\f$. 2201 The CeedOperator must be linear and non-composite. 2202 The associated CeedQFunction must therefore also be linear. 2203 2204 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2205 2206 @param[in] op CeedOperator to create element inverses 2207 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2208 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2209 2210 @return An error code: 0 - success, otherwise - failure 2211 2212 @ref User 2213 **/ 2214 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2215 CeedCall(CeedOperatorCheckReady(op)); 2216 2217 if (op->CreateFDMElementInverse) { 2218 // Backend version 2219 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2220 return CEED_ERROR_SUCCESS; 2221 } else { 2222 // Operator fallback 2223 CeedOperator op_fallback; 2224 2225 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2226 if (op_fallback) { 2227 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2228 return CEED_ERROR_SUCCESS; 2229 } 2230 } 2231 2232 // Default interface implementation 2233 Ceed ceed, ceed_parent; 2234 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2235 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2236 ceed_parent = ceed_parent ? ceed_parent : ceed; 2237 CeedQFunction qf; 2238 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2239 2240 // Determine active input basis 2241 bool interp = false, grad = false; 2242 CeedBasis basis = NULL; 2243 CeedElemRestriction rstr = NULL; 2244 CeedOperatorField *op_fields; 2245 CeedQFunctionField *qf_fields; 2246 CeedInt num_input_fields; 2247 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2248 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2249 for (CeedInt i = 0; i < num_input_fields; i++) { 2250 CeedVector vec; 2251 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2252 if (vec == CEED_VECTOR_ACTIVE) { 2253 CeedEvalMode eval_mode; 2254 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2255 interp = interp || eval_mode == CEED_EVAL_INTERP; 2256 grad = grad || eval_mode == CEED_EVAL_GRAD; 2257 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2258 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2259 } 2260 } 2261 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2262 CeedSize l_size = 1; 2263 CeedInt P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2264 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2265 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2266 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2267 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2268 CeedCall(CeedBasisGetDimension(basis, &dim)); 2269 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2270 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2271 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2272 2273 // Build and diagonalize 1D Mass and Laplacian 2274 bool is_tensor_basis; 2275 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2276 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2277 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2278 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2279 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2280 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2281 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2282 CeedCall(CeedCalloc(P_1d, &lambda)); 2283 // -- Build matrices 2284 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2285 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2286 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2287 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2288 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2289 2290 // -- Diagonalize 2291 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2292 CeedCall(CeedFree(&mass)); 2293 CeedCall(CeedFree(&laplace)); 2294 for (CeedInt i = 0; i < P_1d; i++) { 2295 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2296 } 2297 CeedCall(CeedFree(&x)); 2298 2299 // Assemble QFunction 2300 CeedVector assembled = NULL; 2301 CeedElemRestriction rstr_qf = NULL; 2302 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2303 CeedInt layout[3]; 2304 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2305 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2306 CeedScalar max_norm = 0; 2307 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2308 2309 // Calculate element averages 2310 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2311 CeedScalar *elem_avg; 2312 const CeedScalar *assembled_array, *q_weight_array; 2313 CeedVector q_weight; 2314 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2315 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2316 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2317 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2318 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2319 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2320 for (CeedInt e = 0; e < num_elem; e++) { 2321 CeedInt count = 0; 2322 for (CeedInt q = 0; q < num_qpts; q++) { 2323 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2324 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2325 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2326 count++; 2327 } 2328 } 2329 } 2330 if (count) { 2331 elem_avg[e] /= count; 2332 } else { 2333 elem_avg[e] = 1.0; 2334 } 2335 } 2336 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2337 CeedCall(CeedVectorDestroy(&assembled)); 2338 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2339 CeedCall(CeedVectorDestroy(&q_weight)); 2340 2341 // Build FDM diagonal 2342 CeedVector q_data; 2343 CeedScalar *q_data_array, *fdm_diagonal; 2344 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2345 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2346 for (CeedInt c = 0; c < num_comp; c++) { 2347 for (CeedInt n = 0; n < num_nodes; n++) { 2348 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2349 if (grad) { 2350 for (CeedInt d = 0; d < dim; d++) { 2351 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2352 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2353 } 2354 } 2355 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2356 } 2357 } 2358 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2359 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2360 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2361 for (CeedInt e = 0; e < num_elem; e++) { 2362 for (CeedInt c = 0; c < num_comp; c++) { 2363 for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2364 } 2365 } 2366 CeedCall(CeedFree(&elem_avg)); 2367 CeedCall(CeedFree(&fdm_diagonal)); 2368 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2369 2370 // Setup FDM operator 2371 // -- Basis 2372 CeedBasis fdm_basis; 2373 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2374 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2375 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2376 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2377 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2378 CeedCall(CeedFree(&fdm_interp)); 2379 CeedCall(CeedFree(&grad_dummy)); 2380 CeedCall(CeedFree(&q_ref_dummy)); 2381 CeedCall(CeedFree(&q_weight_dummy)); 2382 CeedCall(CeedFree(&lambda)); 2383 2384 // -- Restriction 2385 CeedElemRestriction rstr_qd_i; 2386 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2387 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2388 // -- QFunction 2389 CeedQFunction qf_fdm; 2390 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2391 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2392 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2393 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2394 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2395 // -- QFunction context 2396 CeedInt *num_comp_data; 2397 CeedCall(CeedCalloc(1, &num_comp_data)); 2398 num_comp_data[0] = num_comp; 2399 CeedQFunctionContext ctx_fdm; 2400 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2401 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2402 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2403 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2404 // -- Operator 2405 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2406 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2407 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2408 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2409 2410 // Cleanup 2411 CeedCall(CeedVectorDestroy(&q_data)); 2412 CeedCall(CeedBasisDestroy(&fdm_basis)); 2413 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2414 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2415 2416 return CEED_ERROR_SUCCESS; 2417 } 2418 2419 /// @} 2420