1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = NULL; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 128 // Cleanup 129 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 130 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 131 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 132 } 133 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 134 CeedCall(CeedOperatorCheckReady(op_fallback)); 135 op->op_fallback = op_fallback; 136 137 return CEED_ERROR_SUCCESS; 138 } 139 140 /** 141 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 142 143 @param[in] op CeedOperator to retrieve fallback for 144 @param[out] op_fallback Fallback CeedOperator 145 146 @return An error code: 0 - success, otherwise - failure 147 148 @ref Developer 149 **/ 150 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 151 // Create if needed 152 if (!op->op_fallback) { 153 CeedCall(CeedOperatorCreateFallback(op)); 154 } 155 if (op->op_fallback) { 156 bool is_debug; 157 158 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 159 if (is_debug) { 160 Ceed ceed, ceed_fallback; 161 const char *resource, *resource_fallback; 162 163 CeedCall(CeedOperatorGetCeed(op, &ceed)); 164 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 165 CeedCall(CeedGetResource(ceed, &resource)); 166 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 167 168 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n"); 169 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 170 op->op_fallback); 171 } 172 } 173 *op_fallback = op->op_fallback; 174 175 return CEED_ERROR_SUCCESS; 176 } 177 178 /** 179 @brief Select correct basis matrix pointer based on CeedEvalMode 180 181 @param[in] basis CeedBasis from which to get the basis matrix 182 @param[in] eval_mode Current basis evaluation mode 183 @param[in] identity Pointer to identity matrix 184 @param[out] basis_ptr Basis pointer to set 185 186 @ref Developer 187 **/ 188 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 189 switch (eval_mode) { 190 case CEED_EVAL_NONE: 191 *basis_ptr = identity; 192 break; 193 case CEED_EVAL_INTERP: 194 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 195 break; 196 case CEED_EVAL_GRAD: 197 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 198 break; 199 case CEED_EVAL_DIV: 200 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 201 break; 202 case CEED_EVAL_CURL: 203 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 204 break; 205 case CEED_EVAL_WEIGHT: 206 break; // Caught by QF Assembly 207 } 208 assert(*basis_ptr != NULL); 209 210 return CEED_ERROR_SUCCESS; 211 } 212 213 /** 214 @brief Create point block restriction for active operator field 215 216 @param[in] rstr Original CeedElemRestriction for active field 217 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 218 219 @return An error code: 0 - success, otherwise - failure 220 221 @ref Developer 222 **/ 223 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 224 Ceed ceed; 225 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 226 const CeedInt *offsets; 227 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 228 229 // Expand offsets 230 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 231 CeedSize l_size; 232 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 233 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 234 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 235 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 236 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 237 CeedInt shift = num_comp; 238 if (comp_stride != 1) shift *= num_comp; 239 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 240 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 241 pointblock_offsets[i] = offsets[i] * shift; 242 } 243 244 // Create new restriction 245 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 246 pointblock_offsets, pointblock_rstr)); 247 248 // Cleanup 249 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 250 251 return CEED_ERROR_SUCCESS; 252 } 253 254 /** 255 @brief Core logic for assembling operator diagonal or point block diagonal 256 257 @param[in] op CeedOperator to assemble point block diagonal 258 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 259 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 260 @param[out] assembled CeedVector to store assembled diagonal 261 262 @return An error code: 0 - success, otherwise - failure 263 264 @ref Developer 265 **/ 266 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 267 Ceed ceed; 268 CeedCall(CeedOperatorGetCeed(op, &ceed)); 269 270 // Assemble QFunction 271 CeedQFunction qf; 272 const CeedScalar *assembled_qf_array; 273 CeedVector assembled_qf = NULL; 274 CeedElemRestriction assembled_elem_rstr = NULL; 275 CeedInt num_input_fields, num_output_fields; 276 CeedInt layout[3]; 277 278 CeedCall(CeedOperatorGetQFunction(op, &qf)); 279 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 280 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 281 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 282 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 283 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 284 285 // Get assembly data 286 CeedOperatorAssemblyData data; 287 const CeedEvalMode **eval_modes_in, **eval_modes_out; 288 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 289 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 290 CeedBasis *active_bases; 291 CeedElemRestriction *active_elem_rstrs; 292 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 293 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 294 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 295 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 296 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 297 298 // Loop over all active bases 299 for (CeedInt b = 0; b < num_active_bases; b++) { 300 // Assemble point block diagonal restriction, if needed 301 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 302 303 if (is_pointblock) { 304 CeedElemRestriction point_block_elem_rstr; 305 306 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 307 diag_elem_rstr = point_block_elem_rstr; 308 } 309 310 // Create diagonal vector 311 CeedVector elem_diag; 312 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 313 314 // Assemble element operator diagonals 315 CeedScalar *elem_diag_array; 316 CeedInt num_elem, num_nodes, num_qpts, num_components; 317 318 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 319 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 320 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 321 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 322 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 323 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 324 325 // Construct identity matrix for basis if required 326 bool has_eval_none = false; 327 CeedScalar *identity = NULL; 328 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 329 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 330 } 331 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 332 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 333 } 334 if (has_eval_none) { 335 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 336 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 337 } 338 339 // Compute the diagonal of B^T D B 340 // Each element 341 for (CeedSize e = 0; e < num_elem; e++) { 342 // Each basis eval mode pair 343 CeedInt d_out = 0, q_comp_out; 344 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 345 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 346 const CeedScalar *B_t = NULL; 347 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t); 348 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out)); 349 if (q_comp_out > 1) { 350 if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 351 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 352 } 353 eval_mode_out_prev = eval_modes_out[b][e_out]; 354 355 CeedInt d_in = 0, q_comp_in; 356 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 357 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 358 const CeedScalar *B = NULL; 359 CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B); 360 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in)); 361 if (q_comp_in > 1) { 362 if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 363 else B = &B[(++d_in) * num_qpts * num_nodes]; 364 } 365 eval_mode_in_prev = eval_modes_in[b][e_in]; 366 367 // Each component 368 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 369 // Each qpt/node pair 370 for (CeedInt q = 0; q < num_qpts; q++) { 371 if (is_pointblock) { 372 // Point Block Diagonal 373 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 374 const CeedSize c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 375 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 376 for (CeedInt n = 0; n < num_nodes; n++) { 377 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 378 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 379 } 380 } 381 } else { 382 // Diagonal Only 383 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 384 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 385 for (CeedInt n = 0; n < num_nodes; n++) { 386 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 387 } 388 } 389 } 390 } 391 } 392 } 393 } 394 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 395 396 // Assemble local operator diagonal 397 CeedCall(CeedElemRestrictionApplyUnsigned(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 398 399 // Cleanup 400 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 401 CeedCall(CeedVectorDestroy(&elem_diag)); 402 CeedCall(CeedFree(&identity)); 403 } 404 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 405 CeedCall(CeedVectorDestroy(&assembled_qf)); 406 407 return CEED_ERROR_SUCCESS; 408 } 409 410 /** 411 @brief Core logic for assembling composite operator diagonal 412 413 @param[in] op CeedOperator to assemble point block diagonal 414 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 415 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 416 @param[out] assembled CeedVector to store assembled diagonal 417 418 @return An error code: 0 - success, otherwise - failure 419 420 @ref Developer 421 **/ 422 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 423 CeedVector assembled) { 424 CeedInt num_sub; 425 CeedOperator *suboperators; 426 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 427 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 428 for (CeedInt i = 0; i < num_sub; i++) { 429 if (is_pointblock) { 430 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 431 } else { 432 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 433 } 434 } 435 return CEED_ERROR_SUCCESS; 436 } 437 438 /** 439 @brief Build nonzero pattern for non-composite operator 440 441 Users should generally use CeedOperatorLinearAssembleSymbolic() 442 443 Note: For operators using oriented element restrictions, entries in rows or cols may be negative indicating the assembled value at this nonzero 444 should be negated 445 446 @param[in] op CeedOperator to assemble nonzero pattern 447 @param[in] offset Offset for number of entries 448 @param[out] rows Row number for each entry 449 @param[out] cols Column number for each entry 450 451 @return An error code: 0 - success, otherwise - failure 452 453 @ref Developer 454 **/ 455 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 456 Ceed ceed; 457 bool is_composite; 458 CeedCall(CeedOperatorGetCeed(op, &ceed)); 459 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 460 461 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 462 463 CeedSize num_nodes; 464 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 465 CeedElemRestriction rstr_in; 466 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 467 CeedInt num_elem, elem_size, num_comp; 468 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 469 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 470 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 471 CeedInt layout_er[3]; 472 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 473 474 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 475 476 // Determine elem_dof relation 477 CeedVector index_vec; 478 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 479 CeedScalar *array; 480 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 481 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 482 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 483 CeedVector elem_dof; 484 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 485 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 486 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 487 const CeedScalar *elem_dof_a; 488 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 489 CeedCall(CeedVectorDestroy(&index_vec)); 490 491 // Determine i, j locations for element matrices 492 CeedSize count = 0; 493 for (CeedInt e = 0; e < num_elem; e++) { 494 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 495 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 496 for (CeedInt i = 0; i < elem_size; i++) { 497 for (CeedInt j = 0; j < elem_size; j++) { 498 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 499 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 500 501 const CeedInt row = elem_dof_a[elem_dof_index_row]; 502 const CeedInt col = elem_dof_a[elem_dof_index_col]; 503 504 rows[offset + count] = row; 505 cols[offset + count] = col; 506 count++; 507 } 508 } 509 } 510 } 511 } 512 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 513 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 514 CeedCall(CeedVectorDestroy(&elem_dof)); 515 516 return CEED_ERROR_SUCCESS; 517 } 518 519 /** 520 @brief Assemble nonzero entries for non-composite operator 521 522 Users should generally use CeedOperatorLinearAssemble() 523 524 @param[in] op CeedOperator to assemble 525 @param[in] offset Offset for number of entries 526 @param[out] values Values to assemble into matrix 527 528 @return An error code: 0 - success, otherwise - failure 529 530 @ref Developer 531 **/ 532 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 533 Ceed ceed; 534 bool is_composite; 535 CeedCall(CeedOperatorGetCeed(op, &ceed)); 536 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 537 538 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 539 540 // Early exit for empty operator 541 { 542 CeedInt num_elem = 0; 543 544 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 545 if (num_elem == 0) return CEED_ERROR_SUCCESS; 546 } 547 548 if (op->LinearAssembleSingle) { 549 // Backend version 550 CeedCall(op->LinearAssembleSingle(op, offset, values)); 551 return CEED_ERROR_SUCCESS; 552 } else { 553 // Operator fallback 554 CeedOperator op_fallback; 555 556 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 557 if (op_fallback) { 558 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 559 return CEED_ERROR_SUCCESS; 560 } 561 } 562 563 // Assemble QFunction 564 CeedQFunction qf; 565 CeedCall(CeedOperatorGetQFunction(op, &qf)); 566 CeedVector assembled_qf = NULL; 567 CeedElemRestriction rstr_q = NULL; 568 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 569 CeedSize qf_length; 570 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 571 572 CeedInt num_input_fields, num_output_fields; 573 CeedOperatorField *input_fields; 574 CeedOperatorField *output_fields; 575 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 576 577 // Get assembly data 578 CeedOperatorAssemblyData data; 579 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 580 const CeedEvalMode **eval_modes_in, **eval_modes_out; 581 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 582 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 583 &eval_modes_out, NULL, NULL)); 584 CeedBasis *bases; 585 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 586 CeedBasis basis_in = bases[0]; 587 588 CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 589 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 590 591 CeedElemRestriction active_rstr; 592 CeedInt num_elem, elem_size, num_qpts, num_comp; 593 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 594 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 595 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 596 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 597 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 598 599 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 600 601 // loop over elements and put in data structure 602 const CeedScalar *assembled_qf_array; 603 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 604 605 CeedInt layout_qf[3]; 606 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 607 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 608 609 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 610 const CeedScalar **B_mats_in, **B_mats_out; 611 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 612 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 613 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 614 CeedScalar elem_mat[elem_size * elem_size]; 615 CeedSize count = 0; 616 CeedScalar *vals; 617 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 618 for (CeedSize e = 0; e < num_elem; e++) { 619 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 620 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 621 // Compute B^T*D 622 for (CeedSize n = 0; n < elem_size; n++) { 623 for (CeedSize q = 0; q < num_qpts; q++) { 624 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 625 const CeedSize btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 626 CeedScalar sum = 0.0; 627 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 628 const CeedSize b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 629 const CeedSize eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 630 const CeedSize qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 631 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 632 } 633 BTD_mat[btd_index] = sum; 634 } 635 } 636 } 637 // form element matrix itself (for each block component) 638 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 639 640 // put element matrix in coordinate data structure 641 for (CeedInt i = 0; i < elem_size; i++) { 642 for (CeedInt j = 0; j < elem_size; j++) { 643 vals[offset + count] = elem_mat[i * elem_size + j]; 644 count++; 645 } 646 } 647 } 648 } 649 } 650 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 651 CeedCall(CeedVectorRestoreArray(values, &vals)); 652 653 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 654 CeedCall(CeedVectorDestroy(&assembled_qf)); 655 656 return CEED_ERROR_SUCCESS; 657 } 658 659 /** 660 @brief Count number of entries for assembled CeedOperator 661 662 @param[in] op CeedOperator to assemble 663 @param[out] num_entries Number of entries in assembled representation 664 665 @return An error code: 0 - success, otherwise - failure 666 667 @ref Utility 668 **/ 669 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) { 670 bool is_composite; 671 CeedElemRestriction rstr; 672 CeedInt num_elem, elem_size, num_comp; 673 674 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 675 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 676 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 677 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 678 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 679 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 680 *num_entries = (CeedSize)elem_size * num_comp * elem_size * num_comp * num_elem; 681 682 return CEED_ERROR_SUCCESS; 683 } 684 685 /** 686 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 687 688 @param[in] op_fine Fine grid operator 689 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 690 @param[in] rstr_coarse Coarse grid restriction 691 @param[in] basis_coarse Coarse grid active vector basis 692 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 693 @param[out] op_coarse Coarse grid operator 694 @param[out] op_prolong Coarse to fine operator, or NULL 695 @param[out] op_restrict Fine to coarse operator, or NULL 696 697 @return An error code: 0 - success, otherwise - failure 698 699 @ref Developer 700 **/ 701 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 702 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 703 Ceed ceed; 704 CeedVector mult_vec = NULL; 705 CeedElemRestriction rstr_p_mult_fine = NULL; 706 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 707 708 // Check for composite operator 709 bool is_composite; 710 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 711 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 712 713 // Coarse Grid 714 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 715 CeedElemRestriction rstr_fine = NULL; 716 // -- Clone input fields 717 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 718 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 719 rstr_fine = op_fine->input_fields[i]->elem_rstr; 720 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 721 } else { 722 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 723 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 724 } 725 } 726 // -- Clone output fields 727 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 728 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 729 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 730 } else { 731 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 732 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 733 } 734 } 735 // -- Clone QFunctionAssemblyData 736 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 737 738 // Multiplicity vector 739 if (op_restrict || op_prolong) { 740 CeedVector mult_e_vec; 741 742 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 743 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 744 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 745 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 746 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 747 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 748 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 749 CeedCall(CeedVectorDestroy(&mult_e_vec)); 750 CeedCall(CeedVectorReciprocal(mult_vec)); 751 } 752 753 // Clone name 754 bool has_name = op_fine->name; 755 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 756 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 757 758 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 759 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 760 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 761 762 // Restriction/Prolongation Operators 763 CeedInt num_comp; 764 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 765 766 // Restriction 767 if (op_restrict) { 768 CeedInt *num_comp_r_data; 769 CeedQFunction qf_restrict; 770 CeedQFunctionContext ctx_r; 771 772 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 773 CeedCall(CeedCalloc(1, &num_comp_r_data)); 774 num_comp_r_data[0] = num_comp; 775 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 776 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 777 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 778 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 779 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 780 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 781 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 782 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 783 784 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 785 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 786 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 787 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 788 789 // Set name 790 char *restriction_name; 791 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 792 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 793 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 794 CeedCall(CeedFree(&restriction_name)); 795 796 // Check 797 CeedCall(CeedOperatorCheckReady(*op_restrict)); 798 799 // Cleanup 800 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 801 } 802 803 // Prolongation 804 if (op_prolong) { 805 CeedInt *num_comp_p_data; 806 CeedQFunction qf_prolong; 807 CeedQFunctionContext ctx_p; 808 809 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 810 CeedCall(CeedCalloc(1, &num_comp_p_data)); 811 num_comp_p_data[0] = num_comp; 812 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 813 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 814 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 815 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 816 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 817 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 818 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 819 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 820 821 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 822 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 823 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec)); 824 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 825 826 // Set name 827 char *prolongation_name; 828 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 829 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 830 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 831 CeedCall(CeedFree(&prolongation_name)); 832 833 // Check 834 CeedCall(CeedOperatorCheckReady(*op_prolong)); 835 836 // Cleanup 837 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 838 } 839 840 // Check 841 CeedCall(CeedOperatorCheckReady(*op_coarse)); 842 843 // Cleanup 844 CeedCall(CeedVectorDestroy(&mult_vec)); 845 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 846 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 847 848 return CEED_ERROR_SUCCESS; 849 } 850 851 /** 852 @brief Build 1D mass matrix and Laplacian with perturbation 853 854 @param[in] interp_1d Interpolation matrix in one dimension 855 @param[in] grad_1d Gradient matrix in one dimension 856 @param[in] q_weight_1d Quadrature weights in one dimension 857 @param[in] P_1d Number of basis nodes in one dimension 858 @param[in] Q_1d Number of quadrature points in one dimension 859 @param[in] dim Dimension of basis 860 @param[out] mass Assembled mass matrix in one dimension 861 @param[out] laplace Assembled perturbed Laplacian in one dimension 862 863 @return An error code: 0 - success, otherwise - failure 864 865 @ref Developer 866 **/ 867 CeedPragmaOptimizeOff 868 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 869 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 870 for (CeedInt i = 0; i < P_1d; i++) { 871 for (CeedInt j = 0; j < P_1d; j++) { 872 CeedScalar sum = 0.0; 873 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 874 mass[i + j * P_1d] = sum; 875 } 876 } 877 // -- Laplacian 878 for (CeedInt i = 0; i < P_1d; i++) { 879 for (CeedInt j = 0; j < P_1d; j++) { 880 CeedScalar sum = 0.0; 881 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 882 laplace[i + j * P_1d] = sum; 883 } 884 } 885 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 886 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 887 return CEED_ERROR_SUCCESS; 888 } 889 CeedPragmaOptimizeOn 890 891 /// @} 892 893 /// ---------------------------------------------------------------------------- 894 /// CeedOperator Backend API 895 /// ---------------------------------------------------------------------------- 896 /// @addtogroup CeedOperatorBackend 897 /// @{ 898 899 /** 900 @brief Create object holding CeedQFunction assembly data for CeedOperator 901 902 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 903 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 904 905 @return An error code: 0 - success, otherwise - failure 906 907 @ref Backend 908 **/ 909 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 910 CeedCall(CeedCalloc(1, data)); 911 (*data)->ref_count = 1; 912 (*data)->ceed = ceed; 913 CeedCall(CeedReference(ceed)); 914 915 return CEED_ERROR_SUCCESS; 916 } 917 918 /** 919 @brief Increment the reference counter for a CeedQFunctionAssemblyData 920 921 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 922 923 @return An error code: 0 - success, otherwise - failure 924 925 @ref Backend 926 **/ 927 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 928 data->ref_count++; 929 return CEED_ERROR_SUCCESS; 930 } 931 932 /** 933 @brief Set re-use of CeedQFunctionAssemblyData 934 935 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 936 @param[in] reuse_data Boolean flag indicating data re-use 937 938 @return An error code: 0 - success, otherwise - failure 939 940 @ref Backend 941 **/ 942 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 943 data->reuse_data = reuse_data; 944 data->needs_data_update = true; 945 return CEED_ERROR_SUCCESS; 946 } 947 948 /** 949 @brief Mark QFunctionAssemblyData as stale 950 951 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 952 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 953 954 @return An error code: 0 - success, otherwise - failure 955 956 @ref Backend 957 **/ 958 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 959 data->needs_data_update = needs_data_update; 960 return CEED_ERROR_SUCCESS; 961 } 962 963 /** 964 @brief Determine if QFunctionAssemblyData needs update 965 966 @param[in] data CeedQFunctionAssemblyData to mark as stale 967 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 968 969 @return An error code: 0 - success, otherwise - failure 970 971 @ref Backend 972 **/ 973 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 974 *is_update_needed = !data->reuse_data || data->needs_data_update; 975 return CEED_ERROR_SUCCESS; 976 } 977 978 /** 979 @brief Copy the pointer to a CeedQFunctionAssemblyData. 980 981 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 982 983 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 984 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 985 CeedQFunctionAssemblyData. 986 987 @param[in] data CeedQFunctionAssemblyData to copy reference to 988 @param[in,out] data_copy Variable to store copied reference 989 990 @return An error code: 0 - success, otherwise - failure 991 992 @ref Backend 993 **/ 994 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 995 CeedCall(CeedQFunctionAssemblyDataReference(data)); 996 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 997 *data_copy = data; 998 return CEED_ERROR_SUCCESS; 999 } 1000 1001 /** 1002 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1003 1004 @param[in] data CeedQFunctionAssemblyData to retrieve status 1005 @param[out] is_setup Boolean flag for setup status 1006 1007 @return An error code: 0 - success, otherwise - failure 1008 1009 @ref Backend 1010 **/ 1011 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1012 *is_setup = data->is_setup; 1013 return CEED_ERROR_SUCCESS; 1014 } 1015 1016 /** 1017 @brief Set internal objects for CeedQFunctionAssemblyData 1018 1019 @param[in,out] data CeedQFunctionAssemblyData to set objects 1020 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1021 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1022 1023 @return An error code: 0 - success, otherwise - failure 1024 1025 @ref Backend 1026 **/ 1027 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1028 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1029 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1030 1031 data->is_setup = true; 1032 return CEED_ERROR_SUCCESS; 1033 } 1034 1035 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1036 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1037 1038 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1039 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1040 1041 return CEED_ERROR_SUCCESS; 1042 } 1043 1044 /** 1045 @brief Destroy CeedQFunctionAssemblyData 1046 1047 @param[in,out] data CeedQFunctionAssemblyData to destroy 1048 1049 @return An error code: 0 - success, otherwise - failure 1050 1051 @ref Backend 1052 **/ 1053 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1054 if (!*data || --(*data)->ref_count > 0) { 1055 *data = NULL; 1056 return CEED_ERROR_SUCCESS; 1057 } 1058 CeedCall(CeedDestroy(&(*data)->ceed)); 1059 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1060 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1061 1062 CeedCall(CeedFree(data)); 1063 return CEED_ERROR_SUCCESS; 1064 } 1065 1066 /** 1067 @brief Get CeedOperatorAssemblyData 1068 1069 @param[in] op CeedOperator to assemble 1070 @param[out] data CeedQFunctionAssemblyData 1071 1072 @return An error code: 0 - success, otherwise - failure 1073 1074 @ref Backend 1075 **/ 1076 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1077 if (!op->op_assembled) { 1078 CeedOperatorAssemblyData data; 1079 1080 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1081 op->op_assembled = data; 1082 } 1083 *data = op->op_assembled; 1084 1085 return CEED_ERROR_SUCCESS; 1086 } 1087 1088 /** 1089 @brief Create object holding CeedOperator assembly data. 1090 1091 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1092 An array with references to the corresponding active CeedElemRestrictions is also stored. 1093 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1094 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1095 CeedEvalMode. 1096 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1097 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1098 1099 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1100 @param[in] op CeedOperator to be assembled 1101 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1102 1103 @return An error code: 0 - success, otherwise - failure 1104 1105 @ref Backend 1106 **/ 1107 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1108 CeedInt num_active_bases = 0; 1109 1110 // Allocate 1111 CeedCall(CeedCalloc(1, data)); 1112 (*data)->ceed = ceed; 1113 CeedCall(CeedReference(ceed)); 1114 1115 // Build OperatorAssembly data 1116 CeedQFunction qf; 1117 CeedQFunctionField *qf_fields; 1118 CeedOperatorField *op_fields; 1119 CeedInt num_input_fields; 1120 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1121 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1122 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1123 1124 // Determine active input basis 1125 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1126 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1127 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1128 for (CeedInt i = 0; i < num_input_fields; i++) { 1129 CeedVector vec; 1130 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1131 if (vec == CEED_VECTOR_ACTIVE) { 1132 CeedBasis basis_in = NULL; 1133 CeedEvalMode eval_mode; 1134 CeedInt index = -1, dim, num_comp, q_comp; 1135 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1136 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1137 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1138 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1139 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1140 for (CeedInt i = 0; i < num_active_bases; i++) { 1141 if ((*data)->active_bases[i] == basis_in) index = i; 1142 } 1143 if (index == -1) { 1144 CeedElemRestriction elem_rstr_in; 1145 index = num_active_bases; 1146 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1147 (*data)->active_bases[num_active_bases] = NULL; 1148 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1149 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1150 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1151 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1152 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1153 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1154 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1155 num_eval_modes_in[index] = 0; 1156 num_eval_modes_out[index] = 0; 1157 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1158 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1159 eval_modes_in[index] = NULL; 1160 eval_modes_out[index] = NULL; 1161 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1162 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1163 eval_mode_offsets_in[index] = NULL; 1164 eval_mode_offsets_out[index] = NULL; 1165 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1166 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1167 (*data)->assembled_bases_in[index] = NULL; 1168 (*data)->assembled_bases_out[index] = NULL; 1169 num_active_bases++; 1170 } 1171 if (eval_mode != CEED_EVAL_WEIGHT) { 1172 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1173 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1174 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1175 for (CeedInt d = 0; d < q_comp; d++) { 1176 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1177 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1178 offset += num_comp; 1179 } 1180 num_eval_modes_in[index] += q_comp; 1181 } 1182 } 1183 } 1184 (*data)->num_eval_modes_in = num_eval_modes_in; 1185 (*data)->eval_modes_in = eval_modes_in; 1186 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1187 1188 // Determine active output basis 1189 CeedInt num_output_fields; 1190 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1191 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1192 offset = 0; 1193 for (CeedInt i = 0; i < num_output_fields; i++) { 1194 CeedVector vec; 1195 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1196 if (vec == CEED_VECTOR_ACTIVE) { 1197 CeedBasis basis_out = NULL; 1198 CeedEvalMode eval_mode; 1199 CeedInt index = -1, dim, num_comp, q_comp; 1200 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1201 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1202 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1203 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1204 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1205 for (CeedInt i = 0; i < num_active_bases; i++) { 1206 if ((*data)->active_bases[i] == basis_out) index = i; 1207 } 1208 if (index == -1) { 1209 CeedElemRestriction elem_rstr_out; 1210 1211 index = num_active_bases; 1212 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1213 (*data)->active_bases[num_active_bases] = NULL; 1214 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1215 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1216 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1217 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1218 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1219 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1220 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1221 num_eval_modes_in[index] = 0; 1222 num_eval_modes_out[index] = 0; 1223 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1224 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1225 eval_modes_in[index] = NULL; 1226 eval_modes_out[index] = NULL; 1227 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1228 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1229 eval_mode_offsets_in[index] = NULL; 1230 eval_mode_offsets_out[index] = NULL; 1231 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1232 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1233 (*data)->assembled_bases_in[index] = NULL; 1234 (*data)->assembled_bases_out[index] = NULL; 1235 num_active_bases++; 1236 } 1237 if (eval_mode != CEED_EVAL_WEIGHT) { 1238 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1239 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1240 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1241 for (CeedInt d = 0; d < q_comp; d++) { 1242 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1243 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1244 offset += num_comp; 1245 } 1246 num_eval_modes_out[index] += q_comp; 1247 } 1248 } 1249 } 1250 (*data)->num_output_components = offset; 1251 (*data)->num_eval_modes_out = num_eval_modes_out; 1252 (*data)->eval_modes_out = eval_modes_out; 1253 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1254 (*data)->num_active_bases = num_active_bases; 1255 1256 return CEED_ERROR_SUCCESS; 1257 } 1258 1259 /** 1260 @brief Get CeedOperator CeedEvalModes for assembly. 1261 1262 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1263 1264 @param[in] data CeedOperatorAssemblyData 1265 @param[out] num_active_bases Total number of active bases 1266 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1267 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1268 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1269 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1270 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1271 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1272 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1273 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1274 including contributions of all active bases 1275 1276 @return An error code: 0 - success, otherwise - failure 1277 1278 1279 @ref Backend 1280 **/ 1281 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1282 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1283 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1284 if (num_active_bases) *num_active_bases = data->num_active_bases; 1285 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1286 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1287 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1288 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1289 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1290 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1291 if (num_output_components) *num_output_components = data->num_output_components; 1292 1293 return CEED_ERROR_SUCCESS; 1294 } 1295 1296 /** 1297 @brief Get CeedOperator CeedBasis data for assembly. 1298 1299 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1300 1301 @param[in] data CeedOperatorAssemblyData 1302 @param[out] num_active_bases Number of active bases, or NULL 1303 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1304 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1305 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1306 1307 @return An error code: 0 - success, otherwise - failure 1308 1309 @ref Backend 1310 **/ 1311 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1312 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1313 // Assemble B_in, B_out if needed 1314 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1315 CeedInt num_qpts; 1316 1317 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1318 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1319 CeedInt num_nodes; 1320 CeedScalar *B_in = NULL, *identity = NULL; 1321 bool has_eval_none = false; 1322 1323 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1324 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1325 1326 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1327 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1328 } 1329 if (has_eval_none) { 1330 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1331 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1332 identity[i * num_nodes + i] = 1.0; 1333 } 1334 } 1335 1336 for (CeedInt q = 0; q < num_qpts; q++) { 1337 for (CeedInt n = 0; n < num_nodes; n++) { 1338 CeedInt d_in = 0, q_comp_in; 1339 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1340 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1341 const CeedInt qq = data->num_eval_modes_in[b] * q; 1342 const CeedScalar *B = NULL; 1343 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B); 1344 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1345 if (q_comp_in > 1) { 1346 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1347 else B = &B[(++d_in) * num_qpts * num_nodes]; 1348 } 1349 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1350 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1351 } 1352 } 1353 } 1354 if (identity) CeedCall(CeedFree(identity)); 1355 data->assembled_bases_in[b] = B_in; 1356 } 1357 } 1358 1359 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1360 CeedInt num_qpts; 1361 1362 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1363 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1364 CeedInt num_nodes; 1365 bool has_eval_none = false; 1366 CeedScalar *B_out = NULL, *identity = NULL; 1367 1368 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes)); 1369 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1370 1371 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1372 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1373 } 1374 if (has_eval_none) { 1375 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1376 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1377 identity[i * num_nodes + i] = 1.0; 1378 } 1379 } 1380 1381 for (CeedInt q = 0; q < num_qpts; q++) { 1382 for (CeedInt n = 0; n < num_nodes; n++) { 1383 CeedInt d_out = 0, q_comp_out; 1384 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1385 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1386 const CeedInt qq = data->num_eval_modes_out[b] * q; 1387 const CeedScalar *B = NULL; 1388 CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B); 1389 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1390 if (q_comp_out > 1) { 1391 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1392 else B = &B[(++d_out) * num_qpts * num_nodes]; 1393 } 1394 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1395 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1396 } 1397 } 1398 } 1399 if (identity) CeedCall(CeedFree(identity)); 1400 data->assembled_bases_out[b] = B_out; 1401 } 1402 } 1403 1404 // Pass out assembled data 1405 if (active_bases) *active_bases = data->active_bases; 1406 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1407 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1408 1409 return CEED_ERROR_SUCCESS; 1410 } 1411 1412 /** 1413 @brief Get CeedOperator CeedBasis data for assembly. 1414 1415 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1416 1417 @param[in] data CeedOperatorAssemblyData 1418 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1419 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1420 1421 @return An error code: 0 - success, otherwise - failure 1422 1423 @ref Backend 1424 **/ 1425 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1426 CeedElemRestriction **active_elem_rstrs) { 1427 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1428 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1429 1430 return CEED_ERROR_SUCCESS; 1431 } 1432 1433 /** 1434 @brief Destroy CeedOperatorAssemblyData 1435 1436 @param[in,out] data CeedOperatorAssemblyData to destroy 1437 1438 @return An error code: 0 - success, otherwise - failure 1439 1440 @ref Backend 1441 **/ 1442 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1443 if (!*data) { 1444 *data = NULL; 1445 return CEED_ERROR_SUCCESS; 1446 } 1447 CeedCall(CeedDestroy(&(*data)->ceed)); 1448 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1449 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1450 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1451 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1452 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1453 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1454 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1455 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1456 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1457 } 1458 CeedCall(CeedFree(&(*data)->active_bases)); 1459 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1460 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1461 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1462 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1463 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1464 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1465 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1466 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1467 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1468 1469 CeedCall(CeedFree(data)); 1470 return CEED_ERROR_SUCCESS; 1471 } 1472 1473 /// @} 1474 1475 /// ---------------------------------------------------------------------------- 1476 /// CeedOperator Public API 1477 /// ---------------------------------------------------------------------------- 1478 /// @addtogroup CeedOperatorUser 1479 /// @{ 1480 1481 /** 1482 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1483 1484 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1485 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1486 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1487 1488 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1489 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1490 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1491 1492 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1493 1494 @param[in] op CeedOperator to assemble CeedQFunction 1495 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1496 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1497 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1498 1499 @return An error code: 0 - success, otherwise - failure 1500 1501 @ref User 1502 **/ 1503 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1504 CeedCall(CeedOperatorCheckReady(op)); 1505 1506 if (op->LinearAssembleQFunction) { 1507 // Backend version 1508 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1509 } else { 1510 // Operator fallback 1511 CeedOperator op_fallback; 1512 1513 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1514 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1515 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1516 } 1517 return CEED_ERROR_SUCCESS; 1518 } 1519 1520 /** 1521 @brief Assemble CeedQFunction and store result internally. 1522 1523 Return copied references of stored data to the caller. 1524 Caller is responsible for ownership and destruction of the copied references. 1525 See also @ref CeedOperatorLinearAssembleQFunction 1526 1527 Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers. 1528 These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object. 1529 1530 @param[in] op CeedOperator to assemble CeedQFunction 1531 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1532 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1533 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1534 1535 @return An error code: 0 - success, otherwise - failure 1536 1537 @ref User 1538 **/ 1539 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1540 CeedCall(CeedOperatorCheckReady(op)); 1541 1542 if (op->LinearAssembleQFunctionUpdate) { 1543 // Backend version 1544 bool qf_assembled_is_setup; 1545 CeedVector assembled_vec = NULL; 1546 CeedElemRestriction assembled_rstr = NULL; 1547 1548 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1549 if (qf_assembled_is_setup) { 1550 bool update_needed; 1551 1552 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1553 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1554 if (update_needed) CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1555 } else { 1556 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1557 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1558 } 1559 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1560 1561 // Copy reference from internally held copy 1562 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1563 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1564 CeedCall(CeedVectorDestroy(&assembled_vec)); 1565 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1566 } else { 1567 // Operator fallback 1568 CeedOperator op_fallback; 1569 1570 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1571 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1572 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1573 } 1574 1575 return CEED_ERROR_SUCCESS; 1576 } 1577 1578 /** 1579 @brief Assemble the diagonal of a square linear CeedOperator 1580 1581 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1582 1583 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1584 1585 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1586 1587 @param[in] op CeedOperator to assemble CeedQFunction 1588 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1589 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1590 1591 @return An error code: 0 - success, otherwise - failure 1592 1593 @ref User 1594 **/ 1595 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1596 bool is_composite; 1597 CeedCall(CeedOperatorCheckReady(op)); 1598 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1599 1600 CeedSize input_size = 0, output_size = 0; 1601 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1602 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1603 1604 // Early exit for empty operator 1605 if (!is_composite) { 1606 CeedInt num_elem = 0; 1607 1608 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1609 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1610 } 1611 1612 if (op->LinearAssembleDiagonal) { 1613 // Backend version 1614 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1615 return CEED_ERROR_SUCCESS; 1616 } else if (op->LinearAssembleAddDiagonal) { 1617 // Backend version with zeroing first 1618 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1619 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1620 return CEED_ERROR_SUCCESS; 1621 } else { 1622 // Operator fallback 1623 CeedOperator op_fallback; 1624 1625 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1626 if (op_fallback) { 1627 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1628 return CEED_ERROR_SUCCESS; 1629 } 1630 } 1631 // Default interface implementation 1632 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1633 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1634 1635 return CEED_ERROR_SUCCESS; 1636 } 1637 1638 /** 1639 @brief Assemble the diagonal of a square linear CeedOperator 1640 1641 This sums into a CeedVector the diagonal of a linear CeedOperator. 1642 1643 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1644 1645 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1646 1647 @param[in] op CeedOperator to assemble CeedQFunction 1648 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1649 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1650 1651 @return An error code: 0 - success, otherwise - failure 1652 1653 @ref User 1654 **/ 1655 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1656 bool is_composite; 1657 CeedCall(CeedOperatorCheckReady(op)); 1658 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1659 1660 CeedSize input_size = 0, output_size = 0; 1661 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1662 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1663 1664 // Early exit for empty operator 1665 if (!is_composite) { 1666 CeedInt num_elem = 0; 1667 1668 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1669 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1670 } 1671 1672 if (op->LinearAssembleAddDiagonal) { 1673 // Backend version 1674 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1675 return CEED_ERROR_SUCCESS; 1676 } else { 1677 // Operator fallback 1678 CeedOperator op_fallback; 1679 1680 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1681 if (op_fallback) { 1682 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1683 return CEED_ERROR_SUCCESS; 1684 } 1685 } 1686 // Default interface implementation 1687 if (is_composite) { 1688 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1689 } else { 1690 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1691 } 1692 1693 return CEED_ERROR_SUCCESS; 1694 } 1695 1696 /** 1697 @brief Assemble the point block diagonal of a square linear CeedOperator 1698 1699 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1700 1701 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1702 1703 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1704 1705 @param[in] op CeedOperator to assemble CeedQFunction 1706 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1707 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1708 component in]. 1709 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1710 1711 @return An error code: 0 - success, otherwise - failure 1712 1713 @ref User 1714 **/ 1715 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1716 bool is_composite; 1717 CeedCall(CeedOperatorCheckReady(op)); 1718 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1719 1720 CeedSize input_size = 0, output_size = 0; 1721 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1722 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1723 1724 // Early exit for empty operator 1725 if (!is_composite) { 1726 CeedInt num_elem = 0; 1727 1728 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1729 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1730 } 1731 1732 if (op->LinearAssemblePointBlockDiagonal) { 1733 // Backend version 1734 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1735 return CEED_ERROR_SUCCESS; 1736 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1737 // Backend version with zeroing first 1738 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1739 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1740 return CEED_ERROR_SUCCESS; 1741 } else { 1742 // Operator fallback 1743 CeedOperator op_fallback; 1744 1745 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1746 if (op_fallback) { 1747 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1748 return CEED_ERROR_SUCCESS; 1749 } 1750 } 1751 // Default interface implementation 1752 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1753 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1754 1755 return CEED_ERROR_SUCCESS; 1756 } 1757 1758 /** 1759 @brief Assemble the point block diagonal of a square linear CeedOperator 1760 1761 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1762 1763 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1764 1765 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1766 1767 @param[in] op CeedOperator to assemble CeedQFunction 1768 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1769 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1770 component in]. 1771 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1772 1773 @return An error code: 0 - success, otherwise - failure 1774 1775 @ref User 1776 **/ 1777 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1778 bool is_composite; 1779 CeedCall(CeedOperatorCheckReady(op)); 1780 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1781 1782 CeedSize input_size = 0, output_size = 0; 1783 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1784 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1785 1786 // Early exit for empty operator 1787 if (!is_composite) { 1788 CeedInt num_elem = 0; 1789 1790 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1791 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1792 } 1793 1794 if (op->LinearAssembleAddPointBlockDiagonal) { 1795 // Backend version 1796 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1797 return CEED_ERROR_SUCCESS; 1798 } else { 1799 // Operator fallback 1800 CeedOperator op_fallback; 1801 1802 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1803 if (op_fallback) { 1804 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1805 return CEED_ERROR_SUCCESS; 1806 } 1807 } 1808 // Default interface implementation 1809 if (is_composite) { 1810 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1811 } else { 1812 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1813 } 1814 1815 return CEED_ERROR_SUCCESS; 1816 } 1817 1818 /** 1819 @brief Fully assemble the nonzero pattern of a linear operator. 1820 1821 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1822 1823 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1824 matrix in entry (i, j). 1825 Note that the (i, j) pairs are not unique and may repeat. 1826 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1827 1828 This will generally be slow unless your operator is low-order. 1829 1830 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1831 1832 @param[in] op CeedOperator to assemble 1833 @param[out] num_entries Number of entries in coordinate nonzero pattern 1834 @param[out] rows Row number for each entry 1835 @param[out] cols Column number for each entry 1836 1837 @ref User 1838 **/ 1839 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1840 CeedInt num_suboperators; 1841 CeedSize single_entries; 1842 CeedOperator *sub_operators; 1843 bool is_composite; 1844 CeedCall(CeedOperatorCheckReady(op)); 1845 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1846 1847 if (op->LinearAssembleSymbolic) { 1848 // Backend version 1849 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1850 return CEED_ERROR_SUCCESS; 1851 } else { 1852 // Operator fallback 1853 CeedOperator op_fallback; 1854 1855 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1856 if (op_fallback) { 1857 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1858 return CEED_ERROR_SUCCESS; 1859 } 1860 } 1861 1862 // Default interface implementation 1863 1864 // count entries and allocate rows, cols arrays 1865 *num_entries = 0; 1866 if (is_composite) { 1867 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1868 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1869 for (CeedInt k = 0; k < num_suboperators; ++k) { 1870 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1871 *num_entries += single_entries; 1872 } 1873 } else { 1874 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1875 *num_entries += single_entries; 1876 } 1877 CeedCall(CeedCalloc(*num_entries, rows)); 1878 CeedCall(CeedCalloc(*num_entries, cols)); 1879 1880 // assemble nonzero locations 1881 CeedInt offset = 0; 1882 if (is_composite) { 1883 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1884 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1885 for (CeedInt k = 0; k < num_suboperators; ++k) { 1886 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1887 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1888 offset += single_entries; 1889 } 1890 } else { 1891 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1892 } 1893 1894 return CEED_ERROR_SUCCESS; 1895 } 1896 1897 /** 1898 @brief Fully assemble the nonzero entries of a linear operator. 1899 1900 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1901 1902 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1903 matrix in entry (i, j). 1904 Note that the (i, j) pairs are not unique and may repeat. 1905 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1906 1907 This will generally be slow unless your operator is low-order. 1908 1909 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1910 1911 @param[in] op CeedOperator to assemble 1912 @param[out] values Values to assemble into matrix 1913 1914 @ref User 1915 **/ 1916 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1917 CeedInt num_suboperators; 1918 CeedSize single_entries = 0; 1919 CeedOperator *sub_operators; 1920 bool is_composite; 1921 CeedCall(CeedOperatorCheckReady(op)); 1922 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1923 1924 // Early exit for empty operator 1925 if (!is_composite) { 1926 CeedInt num_elem = 0; 1927 1928 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1929 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1930 } 1931 1932 if (op->LinearAssemble) { 1933 // Backend version 1934 CeedCall(op->LinearAssemble(op, values)); 1935 return CEED_ERROR_SUCCESS; 1936 } else { 1937 // Operator fallback 1938 CeedOperator op_fallback; 1939 1940 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1941 if (op_fallback) { 1942 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 1943 return CEED_ERROR_SUCCESS; 1944 } 1945 } 1946 1947 // Default interface implementation 1948 CeedInt offset = 0; 1949 CeedCall(CeedVectorSetValue(values, 0.0)); 1950 if (is_composite) { 1951 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1952 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1953 for (CeedInt k = 0; k < num_suboperators; k++) { 1954 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 1955 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1956 offset += single_entries; 1957 } 1958 } else { 1959 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 1960 } 1961 1962 return CEED_ERROR_SUCCESS; 1963 } 1964 1965 /** 1966 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 1967 1968 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1969 1970 @param[in] op Composite CeedOperator 1971 @param[in] num_skip_indices Number of suboperators to skip 1972 @param[in] skip_indices Array of indices of suboperators to skip 1973 @param[out] mult Vector to store multiplicity (of size l_size) 1974 1975 @return An error code: 0 - success, otherwise - failure 1976 1977 @ref User 1978 **/ 1979 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 1980 CeedCall(CeedOperatorCheckReady(op)); 1981 1982 Ceed ceed; 1983 CeedInt num_suboperators; 1984 CeedSize l_vec_len; 1985 CeedScalar *mult_array; 1986 CeedVector ones_l_vec; 1987 CeedElemRestriction elem_rstr; 1988 CeedOperator *sub_operators; 1989 1990 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1991 1992 // Zero mult vector 1993 CeedCall(CeedVectorSetValue(mult, 0.0)); 1994 1995 // Get suboperators 1996 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1997 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1998 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 1999 2000 // Work vector 2001 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2002 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2003 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2004 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2005 2006 // Compute multiplicity across suboperators 2007 for (CeedInt i = 0; i < num_suboperators; i++) { 2008 const CeedScalar *sub_mult_array; 2009 CeedVector sub_mult_l_vec, ones_e_vec; 2010 2011 // -- Check for suboperator to skip 2012 for (CeedInt j = 0; j < num_skip_indices; j++) { 2013 if (skip_indices[j] == i) continue; 2014 } 2015 2016 // -- Sub operator multiplicity 2017 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2018 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2019 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2020 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2021 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2022 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2023 // ---- Flag every node present in the current suboperator 2024 for (CeedInt j = 0; j < l_vec_len; j++) { 2025 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2026 } 2027 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2028 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2029 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2030 } 2031 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2032 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2033 2034 return CEED_ERROR_SUCCESS; 2035 } 2036 2037 /** 2038 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2039 grid interpolation 2040 2041 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2042 2043 @param[in] op_fine Fine grid operator 2044 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2045 @param[in] rstr_coarse Coarse grid restriction 2046 @param[in] basis_coarse Coarse grid active vector basis 2047 @param[out] op_coarse Coarse grid operator 2048 @param[out] op_prolong Coarse to fine operator, or NULL 2049 @param[out] op_restrict Fine to coarse operator, or NULL 2050 2051 @return An error code: 0 - success, otherwise - failure 2052 2053 @ref User 2054 **/ 2055 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2056 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2057 CeedCall(CeedOperatorCheckReady(op_fine)); 2058 2059 // Build prolongation matrix, if required 2060 CeedBasis basis_c_to_f = NULL; 2061 if (op_prolong || op_restrict) { 2062 CeedBasis basis_fine; 2063 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2064 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2065 } 2066 2067 // Core code 2068 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2069 2070 return CEED_ERROR_SUCCESS; 2071 } 2072 2073 /** 2074 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2075 2076 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2077 2078 @param[in] op_fine Fine grid operator 2079 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2080 @param[in] rstr_coarse Coarse grid restriction 2081 @param[in] basis_coarse Coarse grid active vector basis 2082 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2083 @param[out] op_coarse Coarse grid operator 2084 @param[out] op_prolong Coarse to fine operator, or NULL 2085 @param[out] op_restrict Fine to coarse operator, or NULL 2086 2087 @return An error code: 0 - success, otherwise - failure 2088 2089 @ref User 2090 **/ 2091 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2092 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2093 CeedOperator *op_restrict) { 2094 CeedCall(CeedOperatorCheckReady(op_fine)); 2095 Ceed ceed; 2096 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2097 2098 // Check for compatible quadrature spaces 2099 CeedBasis basis_fine; 2100 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2101 CeedInt Q_f, Q_c; 2102 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2103 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2104 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2105 2106 // Create coarse to fine basis, if required 2107 CeedBasis basis_c_to_f = NULL; 2108 if (op_prolong || op_restrict) { 2109 // Check if interpolation matrix is provided 2110 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2111 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2112 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2113 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2114 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2115 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2116 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2117 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2118 CeedScalar *q_ref, *q_weight, *grad; 2119 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2120 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2121 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2122 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2123 CeedCall(CeedFree(&q_ref)); 2124 CeedCall(CeedFree(&q_weight)); 2125 CeedCall(CeedFree(&grad)); 2126 } 2127 2128 // Core code 2129 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2130 return CEED_ERROR_SUCCESS; 2131 } 2132 2133 /** 2134 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2135 2136 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2137 2138 @param[in] op_fine Fine grid operator 2139 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2140 @param[in] rstr_coarse Coarse grid restriction 2141 @param[in] basis_coarse Coarse grid active vector basis 2142 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2143 @param[out] op_coarse Coarse grid operator 2144 @param[out] op_prolong Coarse to fine operator, or NULL 2145 @param[out] op_restrict Fine to coarse operator, or NULL 2146 2147 @return An error code: 0 - success, otherwise - failure 2148 2149 @ref User 2150 **/ 2151 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2152 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2153 CeedOperator *op_restrict) { 2154 CeedCall(CeedOperatorCheckReady(op_fine)); 2155 Ceed ceed; 2156 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2157 2158 // Check for compatible quadrature spaces 2159 CeedBasis basis_fine; 2160 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2161 CeedInt Q_f, Q_c; 2162 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2163 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2164 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2165 2166 // Coarse to fine basis 2167 CeedBasis basis_c_to_f = NULL; 2168 if (op_prolong || op_restrict) { 2169 // Check if interpolation matrix is provided 2170 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2171 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2172 CeedElemTopology topo; 2173 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2174 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2175 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2176 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2177 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2178 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2179 CeedScalar *q_ref, *q_weight, *grad; 2180 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2181 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2182 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2183 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2184 CeedCall(CeedFree(&q_ref)); 2185 CeedCall(CeedFree(&q_weight)); 2186 CeedCall(CeedFree(&grad)); 2187 } 2188 2189 // Core code 2190 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2191 return CEED_ERROR_SUCCESS; 2192 } 2193 2194 /** 2195 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2196 2197 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2198 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2199 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2200 \hat S V\f$. 2201 The CeedOperator must be linear and non-composite. 2202 The associated CeedQFunction must therefore also be linear. 2203 2204 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2205 2206 @param[in] op CeedOperator to create element inverses 2207 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2208 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2209 2210 @return An error code: 0 - success, otherwise - failure 2211 2212 @ref User 2213 **/ 2214 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2215 CeedCall(CeedOperatorCheckReady(op)); 2216 2217 if (op->CreateFDMElementInverse) { 2218 // Backend version 2219 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2220 return CEED_ERROR_SUCCESS; 2221 } else { 2222 // Operator fallback 2223 CeedOperator op_fallback; 2224 2225 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2226 if (op_fallback) { 2227 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2228 return CEED_ERROR_SUCCESS; 2229 } 2230 } 2231 2232 // Default interface implementation 2233 Ceed ceed, ceed_parent; 2234 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2235 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2236 ceed_parent = ceed_parent ? ceed_parent : ceed; 2237 CeedQFunction qf; 2238 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2239 2240 // Determine active input basis 2241 bool interp = false, grad = false; 2242 CeedBasis basis = NULL; 2243 CeedElemRestriction rstr = NULL; 2244 CeedOperatorField *op_fields; 2245 CeedQFunctionField *qf_fields; 2246 CeedInt num_input_fields; 2247 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2248 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2249 for (CeedInt i = 0; i < num_input_fields; i++) { 2250 CeedVector vec; 2251 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2252 if (vec == CEED_VECTOR_ACTIVE) { 2253 CeedEvalMode eval_mode; 2254 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2255 interp = interp || eval_mode == CEED_EVAL_INTERP; 2256 grad = grad || eval_mode == CEED_EVAL_GRAD; 2257 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2258 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2259 } 2260 } 2261 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2262 CeedSize l_size = 1; 2263 CeedInt P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2264 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2265 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2266 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2267 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2268 CeedCall(CeedBasisGetDimension(basis, &dim)); 2269 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2270 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2271 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2272 2273 // Build and diagonalize 1D Mass and Laplacian 2274 bool is_tensor_basis; 2275 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2276 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2277 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2278 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2279 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2280 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2281 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2282 CeedCall(CeedCalloc(P_1d, &lambda)); 2283 // -- Build matrices 2284 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2285 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2286 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2287 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2288 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2289 2290 // -- Diagonalize 2291 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2292 CeedCall(CeedFree(&mass)); 2293 CeedCall(CeedFree(&laplace)); 2294 for (CeedInt i = 0; i < P_1d; i++) { 2295 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2296 } 2297 CeedCall(CeedFree(&x)); 2298 2299 // Assemble QFunction 2300 CeedVector assembled = NULL; 2301 CeedElemRestriction rstr_qf = NULL; 2302 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2303 CeedInt layout[3]; 2304 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2305 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2306 CeedScalar max_norm = 0; 2307 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2308 2309 // Calculate element averages 2310 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2311 CeedScalar *elem_avg; 2312 const CeedScalar *assembled_array, *q_weight_array; 2313 CeedVector q_weight; 2314 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2315 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2316 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2317 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2318 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2319 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2320 for (CeedInt e = 0; e < num_elem; e++) { 2321 CeedInt count = 0; 2322 for (CeedInt q = 0; q < num_qpts; q++) { 2323 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2324 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2325 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2326 count++; 2327 } 2328 } 2329 } 2330 if (count) { 2331 elem_avg[e] /= count; 2332 } else { 2333 elem_avg[e] = 1.0; 2334 } 2335 } 2336 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2337 CeedCall(CeedVectorDestroy(&assembled)); 2338 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2339 CeedCall(CeedVectorDestroy(&q_weight)); 2340 2341 // Build FDM diagonal 2342 CeedVector q_data; 2343 CeedScalar *q_data_array, *fdm_diagonal; 2344 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2345 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2346 for (CeedInt c = 0; c < num_comp; c++) { 2347 for (CeedInt n = 0; n < num_nodes; n++) { 2348 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2349 if (grad) { 2350 for (CeedInt d = 0; d < dim; d++) { 2351 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2352 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2353 } 2354 } 2355 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2356 } 2357 } 2358 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2359 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2360 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2361 for (CeedInt e = 0; e < num_elem; e++) { 2362 for (CeedInt c = 0; c < num_comp; c++) { 2363 for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2364 } 2365 } 2366 CeedCall(CeedFree(&elem_avg)); 2367 CeedCall(CeedFree(&fdm_diagonal)); 2368 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2369 2370 // Setup FDM operator 2371 // -- Basis 2372 CeedBasis fdm_basis; 2373 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2374 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2375 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2376 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2377 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2378 CeedCall(CeedFree(&fdm_interp)); 2379 CeedCall(CeedFree(&grad_dummy)); 2380 CeedCall(CeedFree(&q_ref_dummy)); 2381 CeedCall(CeedFree(&q_weight_dummy)); 2382 CeedCall(CeedFree(&lambda)); 2383 2384 // -- Restriction 2385 CeedElemRestriction rstr_qd_i; 2386 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2387 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2388 // -- QFunction 2389 CeedQFunction qf_fdm; 2390 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2391 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2392 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2393 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2394 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2395 // -- QFunction context 2396 CeedInt *num_comp_data; 2397 CeedCall(CeedCalloc(1, &num_comp_data)); 2398 num_comp_data[0] = num_comp; 2399 CeedQFunctionContext ctx_fdm; 2400 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2401 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2402 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2403 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2404 // -- Operator 2405 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2406 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2407 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2408 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2409 2410 // Cleanup 2411 CeedCall(CeedVectorDestroy(&q_data)); 2412 CeedCall(CeedBasisDestroy(&fdm_basis)); 2413 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2414 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2415 2416 return CEED_ERROR_SUCCESS; 2417 } 2418 2419 /// @} 2420