1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <assert.h> 9 #include <ceed-impl.h> 10 #include <ceed/backend.h> 11 #include <ceed/ceed.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) { 128 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 } 130 // Cleanup 131 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 133 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 134 } 135 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 136 CeedCall(CeedOperatorCheckReady(op_fallback)); 137 op->op_fallback = op_fallback; 138 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 144 145 @param[in] op CeedOperator to retrieve fallback for 146 @param[out] op_fallback Fallback CeedOperator 147 148 @return An error code: 0 - success, otherwise - failure 149 150 @ref Developer 151 **/ 152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 153 // Create if needed 154 if (!op->op_fallback) { 155 CeedCall(CeedOperatorCreateFallback(op)); 156 } 157 if (op->op_fallback) { 158 bool is_debug; 159 160 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 161 if (is_debug) { 162 Ceed ceed, ceed_fallback; 163 const char *resource, *resource_fallback; 164 165 CeedCall(CeedOperatorGetCeed(op, &ceed)); 166 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 167 CeedCall(CeedGetResource(ceed, &resource)); 168 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 169 170 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 171 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 172 op->op_fallback); 173 } 174 } 175 *op_fallback = op->op_fallback; 176 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /** 181 @brief Select correct basis matrix pointer based on CeedEvalMode 182 183 @param[in] eval_mode Current basis evaluation mode 184 @param[in] identity Pointer to identity matrix 185 @param[in] interp Pointer to interpolation matrix 186 @param[in] grad Pointer to gradient matrix 187 @param[out] basis_ptr Basis pointer to set 188 189 @ref Developer 190 **/ 191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad, 192 const CeedScalar **basis_ptr) { 193 switch (eval_mode) { 194 case CEED_EVAL_NONE: 195 *basis_ptr = identity; 196 break; 197 case CEED_EVAL_INTERP: 198 *basis_ptr = interp; 199 break; 200 case CEED_EVAL_GRAD: 201 *basis_ptr = grad; 202 break; 203 case CEED_EVAL_WEIGHT: 204 case CEED_EVAL_DIV: 205 case CEED_EVAL_CURL: 206 break; // Caught by QF Assembly 207 } 208 assert(*basis_ptr != NULL); 209 } 210 211 /** 212 @brief Create point block restriction for active operator field 213 214 @param[in] rstr Original CeedElemRestriction for active field 215 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 216 217 @return An error code: 0 - success, otherwise - failure 218 219 @ref Developer 220 **/ 221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 222 Ceed ceed; 223 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 224 const CeedInt *offsets; 225 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 226 227 // Expand offsets 228 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 229 CeedSize l_size; 230 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 231 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 232 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 233 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 234 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 235 CeedInt shift = num_comp; 236 if (comp_stride != 1) shift *= num_comp; 237 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 238 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 239 pointblock_offsets[i] = offsets[i] * shift; 240 } 241 242 // Create new restriction 243 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 244 pointblock_offsets, pointblock_rstr)); 245 246 // Cleanup 247 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 248 249 return CEED_ERROR_SUCCESS; 250 } 251 252 /** 253 @brief Core logic for assembling operator diagonal or point block diagonal 254 255 @param[in] op CeedOperator to assemble point block diagonal 256 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 257 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 258 @param[out] assembled CeedVector to store assembled diagonal 259 260 @return An error code: 0 - success, otherwise - failure 261 262 @ref Developer 263 **/ 264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 265 Ceed ceed; 266 CeedCall(CeedOperatorGetCeed(op, &ceed)); 267 268 // Assemble QFunction 269 CeedQFunction qf; 270 const CeedScalar *assembled_qf_array; 271 CeedVector assembled_qf; 272 CeedElemRestriction assembled_elem_rstr; 273 CeedInt num_input_fields, num_output_fields; 274 CeedInt layout[3]; 275 276 CeedCall(CeedOperatorGetQFunction(op, &qf)); 277 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 278 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 279 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 280 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 281 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 282 283 // Get assembly data 284 CeedOperatorAssemblyData data; 285 const CeedEvalMode **eval_modes_in, **eval_modes_out; 286 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 287 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 288 CeedBasis *active_bases; 289 CeedElemRestriction *active_elem_rstrs; 290 291 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 292 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 293 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 294 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 295 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 296 297 // Loop over all active bases 298 for (CeedInt b = 0; b < num_active_bases; b++) { 299 // Assemble point block diagonal restriction, if needed 300 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 301 302 if (is_pointblock) { 303 CeedElemRestriction point_block_elem_rstr; 304 305 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 306 diag_elem_rstr = point_block_elem_rstr; 307 } 308 309 // Create diagonal vector 310 CeedVector elem_diag; 311 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array; 315 CeedInt num_elem, num_nodes, num_qpts, num_components; 316 317 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 318 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 319 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 320 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 321 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 322 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 323 324 // Basis matrices 325 const CeedScalar *interp, *grad; 326 CeedScalar *identity = NULL; 327 bool has_eval_none = false; 328 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 329 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 330 } 331 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 332 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 333 } 334 if (has_eval_none) { 335 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 336 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 337 } 338 CeedCall(CeedBasisGetInterp(active_bases[b], &interp)); 339 CeedCall(CeedBasisGetGrad(active_bases[b], &grad)); 340 // Compute the diagonal of B^T D B 341 // Each element 342 for (CeedInt e = 0; e < num_elem; e++) { 343 CeedInt d_out = -1; 344 // Each basis eval mode pair 345 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 346 const CeedScalar *B_t = NULL; 347 if (eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out += 1; 348 CeedOperatorGetBasisPointer(eval_modes_out[b][e_out], identity, interp, &grad[d_out * num_qpts * num_nodes], &B_t); 349 CeedInt d_in = -1; 350 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 351 const CeedScalar *B = NULL; 352 if (eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in += 1; 353 CeedOperatorGetBasisPointer(eval_modes_in[b][e_in], identity, interp, &grad[d_in * num_qpts * num_nodes], &B); 354 // Each component 355 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 356 // Each qpt/node pair 357 for (CeedInt q = 0; q < num_qpts; q++) { 358 if (is_pointblock) { 359 // Point Block Diagonal 360 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 361 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 362 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 363 for (CeedInt n = 0; n < num_nodes; n++) { 364 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 365 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 366 } 367 } 368 } else { 369 // Diagonal Only 370 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 371 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 372 for (CeedInt n = 0; n < num_nodes; n++) { 373 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 374 } 375 } 376 } 377 } 378 } 379 } 380 } 381 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 382 383 // Assemble local operator diagonal 384 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 385 386 // Cleanup 387 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 388 CeedCall(CeedVectorDestroy(&elem_diag)); 389 CeedCall(CeedFree(&identity)); 390 } 391 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 392 CeedCall(CeedVectorDestroy(&assembled_qf)); 393 394 return CEED_ERROR_SUCCESS; 395 } 396 397 /** 398 @brief Core logic for assembling composite operator diagonal 399 400 @param[in] op CeedOperator to assemble point block diagonal 401 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 402 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 403 @param[out] assembled CeedVector to store assembled diagonal 404 405 @return An error code: 0 - success, otherwise - failure 406 407 @ref Developer 408 **/ 409 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 410 CeedVector assembled) { 411 CeedInt num_sub; 412 CeedOperator *suboperators; 413 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 414 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 415 for (CeedInt i = 0; i < num_sub; i++) { 416 if (is_pointblock) { 417 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 418 } else { 419 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 420 } 421 } 422 return CEED_ERROR_SUCCESS; 423 } 424 425 /** 426 @brief Build nonzero pattern for non-composite operator 427 428 Users should generally use CeedOperatorLinearAssembleSymbolic() 429 430 @param[in] op CeedOperator to assemble nonzero pattern 431 @param[in] offset Offset for number of entries 432 @param[out] rows Row number for each entry 433 @param[out] cols Column number for each entry 434 435 @return An error code: 0 - success, otherwise - failure 436 437 @ref Developer 438 **/ 439 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 440 Ceed ceed; 441 bool is_composite; 442 CeedCall(CeedOperatorGetCeed(op, &ceed)); 443 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 444 445 if (is_composite) { 446 // LCOV_EXCL_START 447 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 448 // LCOV_EXCL_STOP 449 } 450 451 CeedSize num_nodes; 452 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 453 CeedElemRestriction rstr_in; 454 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 455 CeedInt num_elem, elem_size, num_comp; 456 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 457 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 458 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 459 CeedInt layout_er[3]; 460 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 461 462 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 463 464 // Determine elem_dof relation 465 CeedVector index_vec; 466 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 467 CeedScalar *array; 468 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 469 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 470 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 471 CeedVector elem_dof; 472 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 473 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 474 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 475 const CeedScalar *elem_dof_a; 476 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 477 CeedCall(CeedVectorDestroy(&index_vec)); 478 479 // Determine i, j locations for element matrices 480 CeedInt count = 0; 481 for (CeedInt e = 0; e < num_elem; e++) { 482 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 483 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 484 for (CeedInt i = 0; i < elem_size; i++) { 485 for (CeedInt j = 0; j < elem_size; j++) { 486 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 487 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 488 489 const CeedInt row = elem_dof_a[elem_dof_index_row]; 490 const CeedInt col = elem_dof_a[elem_dof_index_col]; 491 492 rows[offset + count] = row; 493 cols[offset + count] = col; 494 count++; 495 } 496 } 497 } 498 } 499 } 500 if (count != local_num_entries) { 501 // LCOV_EXCL_START 502 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 503 // LCOV_EXCL_STOP 504 } 505 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 506 CeedCall(CeedVectorDestroy(&elem_dof)); 507 508 return CEED_ERROR_SUCCESS; 509 } 510 511 /** 512 @brief Assemble nonzero entries for non-composite operator 513 514 Users should generally use CeedOperatorLinearAssemble() 515 516 @param[in] op CeedOperator to assemble 517 @param[in] offset Offset for number of entries 518 @param[out] values Values to assemble into matrix 519 520 @return An error code: 0 - success, otherwise - failure 521 522 @ref Developer 523 **/ 524 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 525 Ceed ceed; 526 bool is_composite; 527 CeedCall(CeedOperatorGetCeed(op, &ceed)); 528 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 529 530 if (is_composite) { 531 // LCOV_EXCL_START 532 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 533 // LCOV_EXCL_STOP 534 } 535 536 // Early exit for empty operator 537 { 538 CeedInt num_elem = 0; 539 540 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 541 if (num_elem == 0) return CEED_ERROR_SUCCESS; 542 } 543 544 if (op->LinearAssembleSingle) { 545 // Backend version 546 CeedCall(op->LinearAssembleSingle(op, offset, values)); 547 return CEED_ERROR_SUCCESS; 548 } else { 549 // Operator fallback 550 CeedOperator op_fallback; 551 552 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 553 if (op_fallback) { 554 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 555 return CEED_ERROR_SUCCESS; 556 } 557 } 558 559 // Assemble QFunction 560 CeedQFunction qf; 561 CeedCall(CeedOperatorGetQFunction(op, &qf)); 562 CeedVector assembled_qf; 563 CeedElemRestriction rstr_q; 564 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 565 CeedSize qf_length; 566 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 567 568 CeedInt num_input_fields, num_output_fields; 569 CeedOperatorField *input_fields; 570 CeedOperatorField *output_fields; 571 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 572 573 // Get assembly data 574 CeedOperatorAssemblyData data; 575 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 576 const CeedEvalMode **eval_modes_in, **eval_modes_out; 577 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 578 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 579 &eval_modes_out, NULL, NULL)); 580 CeedBasis *bases; 581 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 582 CeedBasis basis_in = bases[0]; 583 584 if (num_active_bases > 1) { 585 // LCOV_EXCL_START 586 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 587 // LCOV_EXCL_STOP 588 } 589 if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) { 590 // LCOV_EXCL_START 591 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 592 // LCOV_EXCL_STOP 593 } 594 595 CeedElemRestriction active_rstr; 596 CeedInt num_elem, elem_size, num_qpts, num_comp; 597 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 598 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 599 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 600 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 601 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 602 603 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 604 605 // loop over elements and put in data structure 606 const CeedScalar *interp_in, *grad_in; 607 CeedCall(CeedBasisGetInterp(basis_in, &interp_in)); 608 CeedCall(CeedBasisGetGrad(basis_in, &grad_in)); 609 610 const CeedScalar *assembled_qf_array; 611 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 612 613 CeedInt layout_qf[3]; 614 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 615 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 616 617 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 618 const CeedScalar **B_mats_in, **B_mats_out; 619 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 620 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 621 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 622 CeedScalar elem_mat[elem_size * elem_size]; 623 CeedInt count = 0; 624 CeedScalar *vals; 625 CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals)); 626 for (CeedInt e = 0; e < num_elem; e++) { 627 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 628 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 629 // Compute B^T*D 630 for (CeedInt n = 0; n < elem_size; n++) { 631 for (CeedInt q = 0; q < num_qpts; q++) { 632 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 633 const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 634 CeedScalar sum = 0.0; 635 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 636 const CeedInt b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 637 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 638 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 639 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 640 } 641 BTD_mat[btd_index] = sum; 642 } 643 } 644 } 645 // form element matrix itself (for each block component) 646 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 647 648 // put element matrix in coordinate data structure 649 for (CeedInt i = 0; i < elem_size; i++) { 650 for (CeedInt j = 0; j < elem_size; j++) { 651 vals[offset + count] = elem_mat[i * elem_size + j]; 652 count++; 653 } 654 } 655 } 656 } 657 } 658 if (count != local_num_entries) { 659 // LCOV_EXCL_START 660 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 661 // LCOV_EXCL_STOP 662 } 663 CeedCall(CeedVectorRestoreArray(values, &vals)); 664 665 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 666 CeedCall(CeedVectorDestroy(&assembled_qf)); 667 668 return CEED_ERROR_SUCCESS; 669 } 670 671 /** 672 @brief Count number of entries for assembled CeedOperator 673 674 @param[in] op CeedOperator to assemble 675 @param[out] num_entries Number of entries in assembled representation 676 677 @return An error code: 0 - success, otherwise - failure 678 679 @ref Utility 680 **/ 681 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 682 bool is_composite; 683 CeedElemRestriction rstr; 684 CeedInt num_elem, elem_size, num_comp; 685 686 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 687 if (is_composite) { 688 // LCOV_EXCL_START 689 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 690 // LCOV_EXCL_STOP 691 } 692 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 693 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 694 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 695 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 696 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 697 698 return CEED_ERROR_SUCCESS; 699 } 700 701 /** 702 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 703 704 @param[in] op_fine Fine grid operator 705 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 706 @param[in] rstr_coarse Coarse grid restriction 707 @param[in] basis_coarse Coarse grid active vector basis 708 @param[in] basis_c_to_f Basis for coarse to fine interpolation 709 @param[out] op_coarse Coarse grid operator 710 @param[out] op_prolong Coarse to fine operator 711 @param[out] op_restrict Fine to coarse operator 712 713 @return An error code: 0 - success, otherwise - failure 714 715 @ref Developer 716 **/ 717 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 718 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 719 Ceed ceed; 720 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 721 722 // Check for composite operator 723 bool is_composite; 724 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 725 if (is_composite) { 726 // LCOV_EXCL_START 727 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 728 // LCOV_EXCL_STOP 729 } 730 731 // Coarse Grid 732 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 733 CeedElemRestriction rstr_fine = NULL; 734 // -- Clone input fields 735 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 736 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 737 rstr_fine = op_fine->input_fields[i]->elem_rstr; 738 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 739 } else { 740 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 741 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 742 } 743 } 744 // -- Clone output fields 745 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 746 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 747 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 748 } else { 749 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 750 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 751 } 752 } 753 // -- Clone QFunctionAssemblyData 754 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 755 756 // Multiplicity vector 757 CeedVector mult_vec, mult_e_vec; 758 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 759 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 760 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 761 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 762 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 763 CeedCall(CeedVectorDestroy(&mult_e_vec)); 764 CeedCall(CeedVectorReciprocal(mult_vec)); 765 766 // Restriction 767 CeedInt num_comp; 768 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 769 CeedQFunction qf_restrict; 770 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 771 CeedInt *num_comp_r_data; 772 CeedCall(CeedCalloc(1, &num_comp_r_data)); 773 num_comp_r_data[0] = num_comp; 774 CeedQFunctionContext ctx_r; 775 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 776 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 777 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 778 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 779 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 780 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 781 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 782 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 783 784 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 785 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 786 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 787 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 788 789 // Prolongation 790 CeedQFunction qf_prolong; 791 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 792 CeedInt *num_comp_p_data; 793 CeedCall(CeedCalloc(1, &num_comp_p_data)); 794 num_comp_p_data[0] = num_comp; 795 CeedQFunctionContext ctx_p; 796 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 797 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 798 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 799 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 800 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 801 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 802 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 803 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 804 805 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 806 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 807 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 808 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 809 810 // Clone name 811 bool has_name = op_fine->name; 812 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 813 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 814 { 815 char *prolongation_name; 816 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 817 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 818 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 819 CeedCall(CeedFree(&prolongation_name)); 820 } 821 { 822 char *restriction_name; 823 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 824 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 825 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 826 CeedCall(CeedFree(&restriction_name)); 827 } 828 829 // Check 830 CeedCall(CeedOperatorCheckReady(*op_coarse)); 831 CeedCall(CeedOperatorCheckReady(*op_prolong)); 832 CeedCall(CeedOperatorCheckReady(*op_restrict)); 833 834 // Cleanup 835 CeedCall(CeedVectorDestroy(&mult_vec)); 836 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 837 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 838 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 839 840 return CEED_ERROR_SUCCESS; 841 } 842 843 /** 844 @brief Build 1D mass matrix and Laplacian with perturbation 845 846 @param[in] interp_1d Interpolation matrix in one dimension 847 @param[in] grad_1d Gradient matrix in one dimension 848 @param[in] q_weight_1d Quadrature weights in one dimension 849 @param[in] P_1d Number of basis nodes in one dimension 850 @param[in] Q_1d Number of quadrature points in one dimension 851 @param[in] dim Dimension of basis 852 @param[out] mass Assembled mass matrix in one dimension 853 @param[out] laplace Assembled perturbed Laplacian in one dimension 854 855 @return An error code: 0 - success, otherwise - failure 856 857 @ref Developer 858 **/ 859 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, 860 CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 861 for (CeedInt i = 0; i < P_1d; i++) { 862 for (CeedInt j = 0; j < P_1d; j++) { 863 CeedScalar sum = 0.0; 864 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 865 mass[i + j * P_1d] = sum; 866 } 867 } 868 // -- Laplacian 869 for (CeedInt i = 0; i < P_1d; i++) { 870 for (CeedInt j = 0; j < P_1d; j++) { 871 CeedScalar sum = 0.0; 872 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 873 laplace[i + j * P_1d] = sum; 874 } 875 } 876 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 877 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 878 return CEED_ERROR_SUCCESS; 879 } 880 CeedPragmaOptimizeOn; 881 882 /// @} 883 884 /// ---------------------------------------------------------------------------- 885 /// CeedOperator Backend API 886 /// ---------------------------------------------------------------------------- 887 /// @addtogroup CeedOperatorBackend 888 /// @{ 889 890 /** 891 @brief Create object holding CeedQFunction assembly data for CeedOperator 892 893 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 894 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 895 896 @return An error code: 0 - success, otherwise - failure 897 898 @ref Backend 899 **/ 900 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 901 CeedCall(CeedCalloc(1, data)); 902 (*data)->ref_count = 1; 903 (*data)->ceed = ceed; 904 CeedCall(CeedReference(ceed)); 905 906 return CEED_ERROR_SUCCESS; 907 } 908 909 /** 910 @brief Increment the reference counter for a CeedQFunctionAssemblyData 911 912 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 913 914 @return An error code: 0 - success, otherwise - failure 915 916 @ref Backend 917 **/ 918 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 919 data->ref_count++; 920 return CEED_ERROR_SUCCESS; 921 } 922 923 /** 924 @brief Set re-use of CeedQFunctionAssemblyData 925 926 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 927 @param[in] reuse_data Boolean flag indicating data re-use 928 929 @return An error code: 0 - success, otherwise - failure 930 931 @ref Backend 932 **/ 933 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 934 data->reuse_data = reuse_data; 935 data->needs_data_update = true; 936 return CEED_ERROR_SUCCESS; 937 } 938 939 /** 940 @brief Mark QFunctionAssemblyData as stale 941 942 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 943 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 944 945 @return An error code: 0 - success, otherwise - failure 946 947 @ref Backend 948 **/ 949 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 950 data->needs_data_update = needs_data_update; 951 return CEED_ERROR_SUCCESS; 952 } 953 954 /** 955 @brief Determine if QFunctionAssemblyData needs update 956 957 @param[in] data CeedQFunctionAssemblyData to mark as stale 958 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 959 960 @return An error code: 0 - success, otherwise - failure 961 962 @ref Backend 963 **/ 964 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 965 *is_update_needed = !data->reuse_data || data->needs_data_update; 966 return CEED_ERROR_SUCCESS; 967 } 968 969 /** 970 @brief Copy the pointer to a CeedQFunctionAssemblyData. 971 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 972 Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData. 973 This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData. 974 975 @param[in] data CeedQFunctionAssemblyData to copy reference to 976 @param[in,out] data_copy Variable to store copied reference 977 978 @return An error code: 0 - success, otherwise - failure 979 980 @ref Backend 981 **/ 982 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 983 CeedCall(CeedQFunctionAssemblyDataReference(data)); 984 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 985 *data_copy = data; 986 return CEED_ERROR_SUCCESS; 987 } 988 989 /** 990 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 991 992 @param[in] data CeedQFunctionAssemblyData to retrieve status 993 @param[out] is_setup Boolean flag for setup status 994 995 @return An error code: 0 - success, otherwise - failure 996 997 @ref Backend 998 **/ 999 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1000 *is_setup = data->is_setup; 1001 return CEED_ERROR_SUCCESS; 1002 } 1003 1004 /** 1005 @brief Set internal objects for CeedQFunctionAssemblyData 1006 1007 @param[in,out] data CeedQFunctionAssemblyData to set objects 1008 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1009 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1010 1011 @return An error code: 0 - success, otherwise - failure 1012 1013 @ref Backend 1014 **/ 1015 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1016 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1017 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1018 1019 data->is_setup = true; 1020 return CEED_ERROR_SUCCESS; 1021 } 1022 1023 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1024 if (!data->is_setup) { 1025 // LCOV_EXCL_START 1026 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1027 // LCOV_EXCL_STOP 1028 } 1029 1030 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1031 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1032 1033 return CEED_ERROR_SUCCESS; 1034 } 1035 1036 /** 1037 @brief Destroy CeedQFunctionAssemblyData 1038 1039 @param[in,out] data CeedQFunctionAssemblyData to destroy 1040 1041 @return An error code: 0 - success, otherwise - failure 1042 1043 @ref Backend 1044 **/ 1045 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1046 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1047 1048 CeedCall(CeedDestroy(&(*data)->ceed)); 1049 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1050 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1051 1052 CeedCall(CeedFree(data)); 1053 return CEED_ERROR_SUCCESS; 1054 } 1055 1056 /** 1057 @brief Get CeedOperatorAssemblyData 1058 1059 @param[in] op CeedOperator to assemble 1060 @param[out] data CeedQFunctionAssemblyData 1061 1062 @return An error code: 0 - success, otherwise - failure 1063 1064 @ref Backend 1065 **/ 1066 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1067 if (!op->op_assembled) { 1068 CeedOperatorAssemblyData data; 1069 1070 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1071 op->op_assembled = data; 1072 } 1073 *data = op->op_assembled; 1074 1075 return CEED_ERROR_SUCCESS; 1076 } 1077 1078 /** 1079 @brief Create object holding CeedOperator assembly data. 1080 1081 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1082 An array with references to the corresponding active CeedElemRestrictions is also stored. 1083 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1084 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1085 CeedEvalMode. 1086 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1087 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1088 1089 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1090 @param[in] op CeedOperator to be assembled 1091 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1092 1093 @return An error code: 0 - success, otherwise - failure 1094 1095 @ref Backend 1096 **/ 1097 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1098 CeedInt num_active_bases = 0; 1099 1100 // Allocate 1101 CeedCall(CeedCalloc(1, data)); 1102 (*data)->ceed = ceed; 1103 CeedCall(CeedReference(ceed)); 1104 1105 // Build OperatorAssembly data 1106 CeedQFunction qf; 1107 CeedQFunctionField *qf_fields; 1108 CeedOperatorField *op_fields; 1109 CeedInt num_input_fields; 1110 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1111 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1112 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1113 1114 // Determine active input basis 1115 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1116 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1117 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1118 1119 for (CeedInt i = 0; i < num_input_fields; i++) { 1120 CeedVector vec; 1121 1122 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1123 if (vec == CEED_VECTOR_ACTIVE) { 1124 CeedInt index = -1, dim = 1, num_components; 1125 CeedBasis basis_in = NULL; 1126 CeedEvalMode eval_mode; 1127 1128 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1129 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1130 CeedCall(CeedBasisGetNumComponents(basis_in, &num_components)); 1131 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1132 for (CeedInt i = 0; i < num_active_bases; i++) { 1133 if ((*data)->active_bases[i] == basis_in) index = i; 1134 } 1135 if (index == -1) { 1136 CeedElemRestriction elem_rstr_in; 1137 1138 index = num_active_bases; 1139 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1140 (*data)->active_bases[num_active_bases] = NULL; 1141 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1142 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1143 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1144 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1145 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1146 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1147 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1148 num_eval_modes_in[index] = 0; 1149 num_eval_modes_out[index] = 0; 1150 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1151 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1152 eval_modes_in[index] = NULL; 1153 eval_modes_out[index] = NULL; 1154 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1155 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1156 eval_mode_offsets_in[index] = NULL; 1157 eval_mode_offsets_out[index] = NULL; 1158 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1159 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1160 (*data)->assembled_bases_in[index] = NULL; 1161 (*data)->assembled_bases_out[index] = NULL; 1162 num_active_bases++; 1163 } 1164 switch (eval_mode) { 1165 case CEED_EVAL_NONE: 1166 case CEED_EVAL_INTERP: 1167 CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_modes_in[index])); 1168 CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_mode_offsets_in[index])); 1169 eval_modes_in[index][num_eval_modes_in[index]] = eval_mode; 1170 eval_mode_offsets_in[index][num_eval_modes_in[index]] = offset; 1171 offset += num_components; 1172 num_eval_modes_in[index] += 1; 1173 break; 1174 case CEED_EVAL_GRAD: 1175 CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_modes_in[index])); 1176 CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_mode_offsets_in[index])); 1177 for (CeedInt d = 0; d < dim; d++) { 1178 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1179 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1180 offset += num_components; 1181 } 1182 num_eval_modes_in[index] += dim; 1183 break; 1184 case CEED_EVAL_WEIGHT: 1185 case CEED_EVAL_DIV: 1186 case CEED_EVAL_CURL: 1187 break; // Caught by QF Assembly 1188 } 1189 } 1190 } 1191 (*data)->num_eval_modes_in = num_eval_modes_in; 1192 (*data)->eval_modes_in = eval_modes_in; 1193 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1194 1195 // Determine active output basis 1196 CeedInt num_output_fields; 1197 1198 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1199 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1200 offset = 0; 1201 for (CeedInt i = 0; i < num_output_fields; i++) { 1202 CeedVector vec; 1203 1204 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1205 if (vec == CEED_VECTOR_ACTIVE) { 1206 CeedInt index = -1, dim = 1, num_components; 1207 CeedBasis basis_out = NULL; 1208 CeedEvalMode eval_mode; 1209 1210 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1211 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1212 CeedCall(CeedBasisGetNumComponents(basis_out, &num_components)); 1213 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1214 for (CeedInt i = 0; i < num_active_bases; i++) { 1215 if ((*data)->active_bases[i] == basis_out) index = i; 1216 } 1217 if (index == -1) { 1218 CeedElemRestriction elem_rstr_out; 1219 1220 index = num_active_bases; 1221 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1222 (*data)->active_bases[num_active_bases] = NULL; 1223 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1224 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1225 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1226 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1227 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1228 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1229 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1230 num_eval_modes_in[index] = 0; 1231 num_eval_modes_out[index] = 0; 1232 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1233 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1234 eval_modes_in[index] = NULL; 1235 eval_modes_out[index] = NULL; 1236 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1237 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1238 eval_mode_offsets_in[index] = NULL; 1239 eval_mode_offsets_out[index] = NULL; 1240 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1241 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1242 (*data)->assembled_bases_in[index] = NULL; 1243 (*data)->assembled_bases_out[index] = NULL; 1244 num_active_bases++; 1245 } 1246 switch (eval_mode) { 1247 case CEED_EVAL_NONE: 1248 case CEED_EVAL_INTERP: 1249 CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_modes_out[index])); 1250 CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_mode_offsets_out[index])); 1251 eval_modes_out[index][num_eval_modes_out[index]] = eval_mode; 1252 eval_mode_offsets_out[index][num_eval_modes_out[index]] = offset; 1253 offset += num_components; 1254 num_eval_modes_out[index] += 1; 1255 break; 1256 case CEED_EVAL_GRAD: 1257 CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_modes_out[index])); 1258 CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_mode_offsets_out[index])); 1259 for (CeedInt d = 0; d < dim; d++) { 1260 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1261 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1262 offset += num_components; 1263 } 1264 num_eval_modes_out[index] += dim; 1265 break; 1266 case CEED_EVAL_WEIGHT: 1267 case CEED_EVAL_DIV: 1268 case CEED_EVAL_CURL: 1269 break; // Caught by QF Assembly 1270 } 1271 } 1272 } 1273 (*data)->num_output_components = offset; 1274 (*data)->num_eval_modes_out = num_eval_modes_out; 1275 (*data)->eval_modes_out = eval_modes_out; 1276 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1277 (*data)->num_active_bases = num_active_bases; 1278 1279 return CEED_ERROR_SUCCESS; 1280 } 1281 1282 /** 1283 @brief Get CeedOperator CeedEvalModes for assembly. 1284 1285 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1286 1287 @param[in] data CeedOperatorAssemblyData 1288 @param[out] num_active_bases Total number of active bases 1289 @param[out] num_eval_mode_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1290 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1291 @param[out] eval_mode_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1292 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1293 @param[out] num_eval_mode_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1294 @param[out] eval_mode_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1295 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1296 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1297 including contributions of all active bases 1298 1299 @return An error code: 0 - success, otherwise - failure 1300 1301 @ref Backend 1302 **/ 1303 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1304 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1305 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1306 if (num_active_bases) *num_active_bases = data->num_active_bases; 1307 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1308 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1309 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1310 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1311 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1312 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1313 if (num_output_components) *num_output_components = data->num_output_components; 1314 1315 return CEED_ERROR_SUCCESS; 1316 } 1317 1318 /** 1319 @brief Get CeedOperator CeedBasis data for assembly. 1320 1321 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1322 1323 @param[in] data CeedOperatorAssemblyData 1324 @param[out] num_active_bases Number of active bases, or NULL 1325 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1326 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1327 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1328 1329 @return An error code: 0 - success, otherwise - failure 1330 1331 @ref Backend 1332 **/ 1333 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1334 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1335 // Assemble B_in, B_out if needed 1336 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1337 CeedInt num_qpts; 1338 1339 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1340 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1341 CeedInt elem_size; 1342 CeedScalar *B_in = NULL, *identity = NULL; 1343 const CeedScalar *interp_in, *grad_in; 1344 bool has_eval_none = false; 1345 1346 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size)); 1347 CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_in[b], &B_in)); 1348 1349 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1350 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1351 } 1352 if (has_eval_none) { 1353 CeedCall(CeedCalloc(num_qpts * elem_size, &identity)); 1354 for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { 1355 identity[i * elem_size + i] = 1.0; 1356 } 1357 } 1358 CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_in)); 1359 CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_in)); 1360 1361 for (CeedInt q = 0; q < num_qpts; q++) { 1362 for (CeedInt n = 0; n < elem_size; n++) { 1363 CeedInt d_in = -1; 1364 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1365 const CeedInt qq = data->num_eval_modes_in[b] * q; 1366 const CeedScalar *B = NULL; 1367 1368 if (data->eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in++; 1369 CeedOperatorGetBasisPointer(data->eval_modes_in[b][e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &B); 1370 B_in[(qq + e_in) * elem_size + n] = B[q * elem_size + n]; 1371 } 1372 } 1373 } 1374 if (identity) CeedCall(CeedFree(identity)); 1375 data->assembled_bases_in[b] = B_in; 1376 } 1377 } 1378 1379 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1380 CeedInt num_qpts; 1381 1382 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1383 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1384 CeedInt elem_size; 1385 const CeedScalar *interp_out, *grad_out; 1386 bool has_eval_none = false; 1387 CeedScalar *B_out = NULL, *identity = NULL; 1388 1389 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size)); 1390 CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_out[b], &B_out)); 1391 1392 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1393 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1394 } 1395 if (has_eval_none) { 1396 CeedCall(CeedCalloc(num_qpts * elem_size, &identity)); 1397 for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { 1398 identity[i * elem_size + i] = 1.0; 1399 } 1400 } 1401 CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_out)); 1402 CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_out)); 1403 1404 for (CeedInt q = 0; q < num_qpts; q++) { 1405 for (CeedInt n = 0; n < elem_size; n++) { 1406 CeedInt d_out = -1; 1407 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1408 const CeedInt qq = data->num_eval_modes_out[b] * q; 1409 const CeedScalar *B = NULL; 1410 1411 if (data->eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out++; 1412 CeedOperatorGetBasisPointer(data->eval_modes_out[b][e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &B); 1413 B_out[(qq + e_out) * elem_size + n] = B[q * elem_size + n]; 1414 } 1415 } 1416 } 1417 if (identity) CeedCall(CeedFree(identity)); 1418 data->assembled_bases_out[b] = B_out; 1419 } 1420 } 1421 1422 // Pass out assembled data 1423 if (active_bases) *active_bases = data->active_bases; 1424 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1425 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1426 1427 return CEED_ERROR_SUCCESS; 1428 } 1429 1430 /** 1431 @brief Get CeedOperator CeedBasis data for assembly. 1432 1433 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1434 1435 @param[in] data CeedOperatorAssemblyData 1436 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1437 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1438 1439 @return An error code: 0 - success, otherwise - failure 1440 1441 @ref Backend 1442 **/ 1443 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1444 CeedElemRestriction **active_elem_rstrs) { 1445 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1446 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1447 1448 return CEED_ERROR_SUCCESS; 1449 } 1450 1451 /** 1452 @brief Destroy CeedOperatorAssemblyData 1453 1454 @param[in,out] data CeedOperatorAssemblyData to destroy 1455 1456 @return An error code: 0 - success, otherwise - failure 1457 1458 @ref Backend 1459 **/ 1460 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1461 if (!*data) return CEED_ERROR_SUCCESS; 1462 1463 CeedCall(CeedDestroy(&(*data)->ceed)); 1464 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1465 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1466 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1467 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1468 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1469 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1470 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1471 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1472 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1473 } 1474 CeedCall(CeedFree(&(*data)->active_bases)); 1475 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1476 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1477 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1478 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1479 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1480 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1481 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1482 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1483 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1484 1485 CeedCall(CeedFree(data)); 1486 return CEED_ERROR_SUCCESS; 1487 } 1488 1489 /// @} 1490 1491 /// ---------------------------------------------------------------------------- 1492 /// CeedOperator Public API 1493 /// ---------------------------------------------------------------------------- 1494 /// @addtogroup CeedOperatorUser 1495 /// @{ 1496 1497 /** 1498 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1499 1500 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1501 The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices 1502 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the 1503 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, 1504 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] 1505 and producing the output [dv_0, dv_1, v]. 1506 1507 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1508 1509 @param[in] op CeedOperator to assemble CeedQFunction 1510 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1511 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1512 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1513 1514 @return An error code: 0 - success, otherwise - failure 1515 1516 @ref User 1517 **/ 1518 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1519 CeedCall(CeedOperatorCheckReady(op)); 1520 1521 if (op->LinearAssembleQFunction) { 1522 // Backend version 1523 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1524 } else { 1525 // Operator fallback 1526 CeedOperator op_fallback; 1527 1528 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1529 if (op_fallback) { 1530 CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1531 } else { 1532 // LCOV_EXCL_START 1533 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1534 // LCOV_EXCL_STOP 1535 } 1536 } 1537 return CEED_ERROR_SUCCESS; 1538 } 1539 1540 /** 1541 @brief Assemble CeedQFunction and store result internally. 1542 Return copied references of stored data to the caller. 1543 Caller is responsible for ownership and destruction of the copied references. 1544 See also @ref CeedOperatorLinearAssembleQFunction 1545 1546 @param[in] op CeedOperator to assemble CeedQFunction 1547 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1548 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1549 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1550 1551 @return An error code: 0 - success, otherwise - failure 1552 1553 @ref User 1554 **/ 1555 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1556 CeedCall(CeedOperatorCheckReady(op)); 1557 1558 if (op->LinearAssembleQFunctionUpdate) { 1559 // Backend version 1560 bool qf_assembled_is_setup; 1561 CeedVector assembled_vec = NULL; 1562 CeedElemRestriction assembled_rstr = NULL; 1563 1564 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1565 if (qf_assembled_is_setup) { 1566 bool update_needed; 1567 1568 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1569 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1570 if (update_needed) { 1571 CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1572 } 1573 } else { 1574 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1575 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1576 } 1577 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1578 1579 // Copy reference from internally held copy 1580 *assembled = NULL; 1581 *rstr = NULL; 1582 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1583 CeedCall(CeedVectorDestroy(&assembled_vec)); 1584 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1585 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1586 } else { 1587 // Operator fallback 1588 CeedOperator op_fallback; 1589 1590 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1591 if (op_fallback) { 1592 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1593 } else { 1594 // LCOV_EXCL_START 1595 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1596 // LCOV_EXCL_STOP 1597 } 1598 } 1599 1600 return CEED_ERROR_SUCCESS; 1601 } 1602 1603 /** 1604 @brief Assemble the diagonal of a square linear CeedOperator 1605 1606 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1607 1608 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1609 1610 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1611 1612 @param[in] op CeedOperator to assemble CeedQFunction 1613 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1614 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1615 1616 @return An error code: 0 - success, otherwise - failure 1617 1618 @ref User 1619 **/ 1620 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1621 bool is_composite; 1622 CeedCall(CeedOperatorCheckReady(op)); 1623 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1624 1625 CeedSize input_size = 0, output_size = 0; 1626 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1627 if (input_size != output_size) { 1628 // LCOV_EXCL_START 1629 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1630 // LCOV_EXCL_STOP 1631 } 1632 1633 // Early exit for empty operator 1634 if (!is_composite) { 1635 CeedInt num_elem = 0; 1636 1637 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1638 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1639 } 1640 1641 if (op->LinearAssembleDiagonal) { 1642 // Backend version 1643 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1644 return CEED_ERROR_SUCCESS; 1645 } else if (op->LinearAssembleAddDiagonal) { 1646 // Backend version with zeroing first 1647 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1648 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1649 return CEED_ERROR_SUCCESS; 1650 } else { 1651 // Operator fallback 1652 CeedOperator op_fallback; 1653 1654 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1655 if (op_fallback) { 1656 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1657 return CEED_ERROR_SUCCESS; 1658 } 1659 } 1660 // Default interface implementation 1661 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1662 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1663 1664 return CEED_ERROR_SUCCESS; 1665 } 1666 1667 /** 1668 @brief Assemble the diagonal of a square linear CeedOperator 1669 1670 This sums into a CeedVector the diagonal of a linear CeedOperator. 1671 1672 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1673 1674 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1675 1676 @param[in] op CeedOperator to assemble CeedQFunction 1677 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1678 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1679 1680 @return An error code: 0 - success, otherwise - failure 1681 1682 @ref User 1683 **/ 1684 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1685 bool is_composite; 1686 CeedCall(CeedOperatorCheckReady(op)); 1687 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1688 1689 CeedSize input_size = 0, output_size = 0; 1690 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1691 if (input_size != output_size) { 1692 // LCOV_EXCL_START 1693 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1694 // LCOV_EXCL_STOP 1695 } 1696 1697 // Early exit for empty operator 1698 if (!is_composite) { 1699 CeedInt num_elem = 0; 1700 1701 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1702 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1703 } 1704 1705 if (op->LinearAssembleAddDiagonal) { 1706 // Backend version 1707 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1708 return CEED_ERROR_SUCCESS; 1709 } else { 1710 // Operator fallback 1711 CeedOperator op_fallback; 1712 1713 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1714 if (op_fallback) { 1715 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1716 return CEED_ERROR_SUCCESS; 1717 } 1718 } 1719 // Default interface implementation 1720 if (is_composite) { 1721 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1722 } else { 1723 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1724 } 1725 1726 return CEED_ERROR_SUCCESS; 1727 } 1728 1729 /** 1730 @brief Assemble the point block diagonal of a square linear CeedOperator 1731 1732 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1733 1734 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1735 1736 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1737 1738 @param[in] op CeedOperator to assemble CeedQFunction 1739 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1740 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1741 component in]. 1742 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1743 1744 @return An error code: 0 - success, otherwise - failure 1745 1746 @ref User 1747 **/ 1748 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1749 bool is_composite; 1750 CeedCall(CeedOperatorCheckReady(op)); 1751 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1752 1753 CeedSize input_size = 0, output_size = 0; 1754 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1755 if (input_size != output_size) { 1756 // LCOV_EXCL_START 1757 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1758 // LCOV_EXCL_STOP 1759 } 1760 1761 // Early exit for empty operator 1762 if (!is_composite) { 1763 CeedInt num_elem = 0; 1764 1765 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1766 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1767 } 1768 1769 if (op->LinearAssemblePointBlockDiagonal) { 1770 // Backend version 1771 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1772 return CEED_ERROR_SUCCESS; 1773 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1774 // Backend version with zeroing first 1775 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1776 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1777 return CEED_ERROR_SUCCESS; 1778 } else { 1779 // Operator fallback 1780 CeedOperator op_fallback; 1781 1782 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1783 if (op_fallback) { 1784 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1785 return CEED_ERROR_SUCCESS; 1786 } 1787 } 1788 // Default interface implementation 1789 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1790 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1791 1792 return CEED_ERROR_SUCCESS; 1793 } 1794 1795 /** 1796 @brief Assemble the point block diagonal of a square linear CeedOperator 1797 1798 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1799 1800 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1801 1802 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1803 1804 @param[in] op CeedOperator to assemble CeedQFunction 1805 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1806 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1807 component in]. 1808 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1809 1810 @return An error code: 0 - success, otherwise - failure 1811 1812 @ref User 1813 **/ 1814 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1815 bool is_composite; 1816 CeedCall(CeedOperatorCheckReady(op)); 1817 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1818 1819 CeedSize input_size = 0, output_size = 0; 1820 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1821 if (input_size != output_size) { 1822 // LCOV_EXCL_START 1823 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1824 // LCOV_EXCL_STOP 1825 } 1826 1827 // Early exit for empty operator 1828 if (!is_composite) { 1829 CeedInt num_elem = 0; 1830 1831 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1832 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1833 } 1834 1835 if (op->LinearAssembleAddPointBlockDiagonal) { 1836 // Backend version 1837 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1838 return CEED_ERROR_SUCCESS; 1839 } else { 1840 // Operator fallback 1841 CeedOperator op_fallback; 1842 1843 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1844 if (op_fallback) { 1845 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1846 return CEED_ERROR_SUCCESS; 1847 } 1848 } 1849 // Default interface implementation 1850 if (is_composite) { 1851 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1852 } else { 1853 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1854 } 1855 1856 return CEED_ERROR_SUCCESS; 1857 } 1858 1859 /** 1860 @brief Fully assemble the nonzero pattern of a linear operator. 1861 1862 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1863 1864 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1865 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j) 1866 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1867 1868 This will generally be slow unless your operator is low-order. 1869 1870 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1871 1872 @param[in] op CeedOperator to assemble 1873 @param[out] num_entries Number of entries in coordinate nonzero pattern 1874 @param[out] rows Row number for each entry 1875 @param[out] cols Column number for each entry 1876 1877 @ref User 1878 **/ 1879 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1880 CeedInt num_suboperators, single_entries; 1881 CeedOperator *sub_operators; 1882 bool is_composite; 1883 CeedCall(CeedOperatorCheckReady(op)); 1884 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1885 1886 if (op->LinearAssembleSymbolic) { 1887 // Backend version 1888 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1889 return CEED_ERROR_SUCCESS; 1890 } else { 1891 // Operator fallback 1892 CeedOperator op_fallback; 1893 1894 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1895 if (op_fallback) { 1896 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1897 return CEED_ERROR_SUCCESS; 1898 } 1899 } 1900 1901 // Default interface implementation 1902 1903 // count entries and allocate rows, cols arrays 1904 *num_entries = 0; 1905 if (is_composite) { 1906 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1907 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1908 for (CeedInt k = 0; k < num_suboperators; ++k) { 1909 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1910 *num_entries += single_entries; 1911 } 1912 } else { 1913 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1914 *num_entries += single_entries; 1915 } 1916 CeedCall(CeedCalloc(*num_entries, rows)); 1917 CeedCall(CeedCalloc(*num_entries, cols)); 1918 1919 // assemble nonzero locations 1920 CeedInt offset = 0; 1921 if (is_composite) { 1922 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1923 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1924 for (CeedInt k = 0; k < num_suboperators; ++k) { 1925 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1926 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1927 offset += single_entries; 1928 } 1929 } else { 1930 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1931 } 1932 1933 return CEED_ERROR_SUCCESS; 1934 } 1935 1936 /** 1937 @brief Fully assemble the nonzero entries of a linear operator. 1938 1939 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1940 1941 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1942 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added, 1943 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1944 1945 This will generally be slow unless your operator is low-order. 1946 1947 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1948 1949 @param[in] op CeedOperator to assemble 1950 @param[out] values Values to assemble into matrix 1951 1952 @ref User 1953 **/ 1954 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1955 CeedInt num_suboperators, single_entries = 0; 1956 CeedOperator *sub_operators; 1957 bool is_composite; 1958 CeedCall(CeedOperatorCheckReady(op)); 1959 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1960 1961 // Early exit for empty operator 1962 if (!is_composite) { 1963 CeedInt num_elem = 0; 1964 1965 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1966 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1967 } 1968 1969 if (op->LinearAssemble) { 1970 // Backend version 1971 CeedCall(op->LinearAssemble(op, values)); 1972 return CEED_ERROR_SUCCESS; 1973 } else { 1974 // Operator fallback 1975 CeedOperator op_fallback; 1976 1977 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1978 if (op_fallback) { 1979 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 1980 return CEED_ERROR_SUCCESS; 1981 } 1982 } 1983 1984 // Default interface implementation 1985 CeedInt offset = 0; 1986 if (is_composite) { 1987 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1988 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1989 for (CeedInt k = 0; k < num_suboperators; k++) { 1990 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 1991 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1992 offset += single_entries; 1993 } 1994 } else { 1995 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 1996 } 1997 1998 return CEED_ERROR_SUCCESS; 1999 } 2000 2001 /** 2002 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2003 2004 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2005 2006 @param[in] op Composite CeedOperator 2007 @param[in] num_skip_indices Number of suboperators to skip 2008 @param[in] skip_indices Array of indices of suboperators to skip 2009 @param[out] mult Vector to store multiplicity (of size l_size) 2010 2011 @return An error code: 0 - success, otherwise - failure 2012 2013 @ref User 2014 **/ 2015 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2016 CeedCall(CeedOperatorCheckReady(op)); 2017 2018 Ceed ceed; 2019 CeedInt num_suboperators; 2020 CeedSize l_vec_len; 2021 CeedScalar *mult_array; 2022 CeedVector ones_l_vec; 2023 CeedElemRestriction elem_rstr; 2024 CeedOperator *sub_operators; 2025 2026 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2027 2028 // Zero mult vector 2029 CeedCall(CeedVectorSetValue(mult, 0.0)); 2030 2031 // Get suboperators 2032 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2033 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2034 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2035 2036 // Work vector 2037 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2038 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2039 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2040 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2041 2042 // Compute multiplicity across suboperators 2043 for (CeedInt i = 0; i < num_suboperators; i++) { 2044 const CeedScalar *sub_mult_array; 2045 CeedVector sub_mult_l_vec, ones_e_vec; 2046 2047 // -- Check for suboperator to skip 2048 for (CeedInt j = 0; j < num_skip_indices; j++) { 2049 if (skip_indices[j] == i) continue; 2050 } 2051 2052 // -- Sub operator multiplicity 2053 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2054 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2055 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2056 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2057 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2058 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2059 // ---- Flag every node present in the current suboperator 2060 for (CeedInt j = 0; j < l_vec_len; j++) { 2061 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2062 } 2063 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2064 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2065 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2066 } 2067 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2068 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2069 2070 return CEED_ERROR_SUCCESS; 2071 } 2072 2073 /** 2074 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2075 grid interpolation 2076 2077 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2078 2079 @param[in] op_fine Fine grid operator 2080 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2081 @param[in] rstr_coarse Coarse grid restriction 2082 @param[in] basis_coarse Coarse grid active vector basis 2083 @param[out] op_coarse Coarse grid operator 2084 @param[out] op_prolong Coarse to fine operator 2085 @param[out] op_restrict Fine to coarse operator 2086 2087 @return An error code: 0 - success, otherwise - failure 2088 2089 @ref User 2090 **/ 2091 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2092 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2093 CeedCall(CeedOperatorCheckReady(op_fine)); 2094 2095 // Build prolongation matrix 2096 CeedBasis basis_fine, basis_c_to_f; 2097 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2098 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2099 2100 // Core code 2101 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2102 2103 return CEED_ERROR_SUCCESS; 2104 } 2105 2106 /** 2107 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2108 2109 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2110 2111 @param[in] op_fine Fine grid operator 2112 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2113 @param[in] rstr_coarse Coarse grid restriction 2114 @param[in] basis_coarse Coarse grid active vector basis 2115 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 2116 @param[out] op_coarse Coarse grid operator 2117 @param[out] op_prolong Coarse to fine operator 2118 @param[out] op_restrict Fine to coarse operator 2119 2120 @return An error code: 0 - success, otherwise - failure 2121 2122 @ref User 2123 **/ 2124 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2125 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2126 CeedOperator *op_restrict) { 2127 CeedCall(CeedOperatorCheckReady(op_fine)); 2128 Ceed ceed; 2129 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2130 2131 // Check for compatible quadrature spaces 2132 CeedBasis basis_fine; 2133 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2134 CeedInt Q_f, Q_c; 2135 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2136 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2137 if (Q_f != Q_c) { 2138 // LCOV_EXCL_START 2139 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2140 // LCOV_EXCL_STOP 2141 } 2142 2143 // Coarse to fine basis 2144 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2145 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2146 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2147 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2148 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2149 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2150 CeedScalar *q_ref, *q_weight, *grad; 2151 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2152 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2153 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2154 CeedBasis basis_c_to_f; 2155 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2156 CeedCall(CeedFree(&q_ref)); 2157 CeedCall(CeedFree(&q_weight)); 2158 CeedCall(CeedFree(&grad)); 2159 2160 // Core code 2161 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2162 return CEED_ERROR_SUCCESS; 2163 } 2164 2165 /** 2166 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2167 2168 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2169 2170 @param[in] op_fine Fine grid operator 2171 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2172 @param[in] rstr_coarse Coarse grid restriction 2173 @param[in] basis_coarse Coarse grid active vector basis 2174 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 2175 @param[out] op_coarse Coarse grid operator 2176 @param[out] op_prolong Coarse to fine operator 2177 @param[out] op_restrict Fine to coarse operator 2178 2179 @return An error code: 0 - success, otherwise - failure 2180 2181 @ref User 2182 **/ 2183 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2184 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2185 CeedOperator *op_restrict) { 2186 CeedCall(CeedOperatorCheckReady(op_fine)); 2187 Ceed ceed; 2188 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2189 2190 // Check for compatible quadrature spaces 2191 CeedBasis basis_fine; 2192 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2193 CeedInt Q_f, Q_c; 2194 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2195 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2196 if (Q_f != Q_c) { 2197 // LCOV_EXCL_START 2198 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2199 // LCOV_EXCL_STOP 2200 } 2201 2202 // Coarse to fine basis 2203 CeedElemTopology topo; 2204 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2205 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2206 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2207 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2208 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2209 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2210 CeedScalar *q_ref, *q_weight, *grad; 2211 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2212 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2213 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2214 CeedBasis basis_c_to_f; 2215 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2216 CeedCall(CeedFree(&q_ref)); 2217 CeedCall(CeedFree(&q_weight)); 2218 CeedCall(CeedFree(&grad)); 2219 2220 // Core code 2221 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2222 return CEED_ERROR_SUCCESS; 2223 } 2224 2225 /** 2226 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2227 2228 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2229 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V. 2230 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T 2231 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear. 2232 2233 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2234 2235 @param[in] op CeedOperator to create element inverses 2236 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2237 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2238 2239 @return An error code: 0 - success, otherwise - failure 2240 2241 @ref User 2242 **/ 2243 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2244 CeedCall(CeedOperatorCheckReady(op)); 2245 2246 if (op->CreateFDMElementInverse) { 2247 // Backend version 2248 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2249 return CEED_ERROR_SUCCESS; 2250 } else { 2251 // Operator fallback 2252 CeedOperator op_fallback; 2253 2254 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2255 if (op_fallback) { 2256 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2257 return CEED_ERROR_SUCCESS; 2258 } 2259 } 2260 2261 // Default interface implementation 2262 Ceed ceed, ceed_parent; 2263 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2264 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2265 ceed_parent = ceed_parent ? ceed_parent : ceed; 2266 CeedQFunction qf; 2267 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2268 2269 // Determine active input basis 2270 bool interp = false, grad = false; 2271 CeedBasis basis = NULL; 2272 CeedElemRestriction rstr = NULL; 2273 CeedOperatorField *op_fields; 2274 CeedQFunctionField *qf_fields; 2275 CeedInt num_input_fields; 2276 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2277 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2278 for (CeedInt i = 0; i < num_input_fields; i++) { 2279 CeedVector vec; 2280 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2281 if (vec == CEED_VECTOR_ACTIVE) { 2282 CeedEvalMode eval_mode; 2283 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2284 interp = interp || eval_mode == CEED_EVAL_INTERP; 2285 grad = grad || eval_mode == CEED_EVAL_GRAD; 2286 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2287 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2288 } 2289 } 2290 if (!basis) { 2291 // LCOV_EXCL_START 2292 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2293 // LCOV_EXCL_STOP 2294 } 2295 CeedSize l_size = 1; 2296 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2297 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2298 CeedCall(CeedBasisGetNumNodes(basis, &elem_size)); 2299 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2300 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2301 CeedCall(CeedBasisGetDimension(basis, &dim)); 2302 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2303 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2304 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2305 2306 // Build and diagonalize 1D Mass and Laplacian 2307 bool tensor_basis; 2308 CeedCall(CeedBasisIsTensor(basis, &tensor_basis)); 2309 if (!tensor_basis) { 2310 // LCOV_EXCL_START 2311 return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2312 // LCOV_EXCL_STOP 2313 } 2314 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2315 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2316 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2317 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2318 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2319 CeedCall(CeedCalloc(P_1d, &lambda)); 2320 // -- Build matrices 2321 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2322 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2323 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2324 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2325 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2326 2327 // -- Diagonalize 2328 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2329 CeedCall(CeedFree(&mass)); 2330 CeedCall(CeedFree(&laplace)); 2331 for (CeedInt i = 0; i < P_1d; i++) { 2332 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2333 } 2334 CeedCall(CeedFree(&x)); 2335 2336 // Assemble QFunction 2337 CeedVector assembled; 2338 CeedElemRestriction rstr_qf; 2339 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2340 CeedInt layout[3]; 2341 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2342 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2343 CeedScalar max_norm = 0; 2344 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2345 2346 // Calculate element averages 2347 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2348 CeedScalar *elem_avg; 2349 const CeedScalar *assembled_array, *q_weight_array; 2350 CeedVector q_weight; 2351 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2352 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2353 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2354 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2355 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2356 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2357 for (CeedInt e = 0; e < num_elem; e++) { 2358 CeedInt count = 0; 2359 for (CeedInt q = 0; q < num_qpts; q++) { 2360 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2361 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2362 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2363 count++; 2364 } 2365 } 2366 } 2367 if (count) { 2368 elem_avg[e] /= count; 2369 } else { 2370 elem_avg[e] = 1.0; 2371 } 2372 } 2373 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2374 CeedCall(CeedVectorDestroy(&assembled)); 2375 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2376 CeedCall(CeedVectorDestroy(&q_weight)); 2377 2378 // Build FDM diagonal 2379 CeedVector q_data; 2380 CeedScalar *q_data_array, *fdm_diagonal; 2381 CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal)); 2382 const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON; 2383 for (CeedInt c = 0; c < num_comp; c++) { 2384 for (CeedInt n = 0; n < elem_size; n++) { 2385 if (interp) fdm_diagonal[c * elem_size + n] = 1.0; 2386 if (grad) { 2387 for (CeedInt d = 0; d < dim; d++) { 2388 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2389 fdm_diagonal[c * elem_size + n] += lambda[i]; 2390 } 2391 } 2392 if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound; 2393 } 2394 } 2395 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data)); 2396 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2397 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2398 for (CeedInt e = 0; e < num_elem; e++) { 2399 for (CeedInt c = 0; c < num_comp; c++) { 2400 for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]); 2401 } 2402 } 2403 CeedCall(CeedFree(&elem_avg)); 2404 CeedCall(CeedFree(&fdm_diagonal)); 2405 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2406 2407 // Setup FDM operator 2408 // -- Basis 2409 CeedBasis fdm_basis; 2410 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2411 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2412 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2413 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2414 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2415 CeedCall(CeedFree(&fdm_interp)); 2416 CeedCall(CeedFree(&grad_dummy)); 2417 CeedCall(CeedFree(&q_ref_dummy)); 2418 CeedCall(CeedFree(&q_weight_dummy)); 2419 CeedCall(CeedFree(&lambda)); 2420 2421 // -- Restriction 2422 CeedElemRestriction rstr_qd_i; 2423 CeedInt strides[3] = {1, elem_size, elem_size * num_comp}; 2424 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i)); 2425 // -- QFunction 2426 CeedQFunction qf_fdm; 2427 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2428 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2429 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2430 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2431 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2432 // -- QFunction context 2433 CeedInt *num_comp_data; 2434 CeedCall(CeedCalloc(1, &num_comp_data)); 2435 num_comp_data[0] = num_comp; 2436 CeedQFunctionContext ctx_fdm; 2437 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2438 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2439 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2440 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2441 // -- Operator 2442 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2443 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2444 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2445 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2446 2447 // Cleanup 2448 CeedCall(CeedVectorDestroy(&q_data)); 2449 CeedCall(CeedBasisDestroy(&fdm_basis)); 2450 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2451 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2452 2453 return CEED_ERROR_SUCCESS; 2454 } 2455 2456 /// @} 2457