1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <assert.h> 9 #include <ceed-impl.h> 10 #include <ceed/backend.h> 11 #include <ceed/ceed.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) { 128 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 } 130 // Cleanup 131 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 133 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 134 } 135 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 136 CeedCall(CeedOperatorCheckReady(op_fallback)); 137 op->op_fallback = op_fallback; 138 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 144 145 @param[in] op CeedOperator to retrieve fallback for 146 @param[out] op_fallback Fallback CeedOperator 147 148 @return An error code: 0 - success, otherwise - failure 149 150 @ref Developer 151 **/ 152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 153 // Create if needed 154 if (!op->op_fallback) { 155 CeedCall(CeedOperatorCreateFallback(op)); 156 } 157 if (op->op_fallback) { 158 bool is_debug; 159 160 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 161 if (is_debug) { 162 Ceed ceed, ceed_fallback; 163 const char *resource, *resource_fallback; 164 165 CeedCall(CeedOperatorGetCeed(op, &ceed)); 166 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 167 CeedCall(CeedGetResource(ceed, &resource)); 168 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 169 170 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 171 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 172 op->op_fallback); 173 } 174 } 175 *op_fallback = op->op_fallback; 176 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /** 181 @brief Select correct basis matrix pointer based on CeedEvalMode 182 183 @param[in] eval_mode Current basis evaluation mode 184 @param[in] identity Pointer to identity matrix 185 @param[in] interp Pointer to interpolation matrix 186 @param[in] grad Pointer to gradient matrix 187 @param[out] basis_ptr Basis pointer to set 188 189 @ref Developer 190 **/ 191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad, 192 const CeedScalar **basis_ptr) { 193 switch (eval_mode) { 194 case CEED_EVAL_NONE: 195 *basis_ptr = identity; 196 break; 197 case CEED_EVAL_INTERP: 198 *basis_ptr = interp; 199 break; 200 case CEED_EVAL_GRAD: 201 *basis_ptr = grad; 202 break; 203 case CEED_EVAL_WEIGHT: 204 case CEED_EVAL_DIV: 205 case CEED_EVAL_CURL: 206 break; // Caught by QF Assembly 207 } 208 assert(*basis_ptr != NULL); 209 } 210 211 /** 212 @brief Create point block restriction for active operator field 213 214 @param[in] rstr Original CeedElemRestriction for active field 215 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 216 217 @return An error code: 0 - success, otherwise - failure 218 219 @ref Developer 220 **/ 221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 222 Ceed ceed; 223 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 224 const CeedInt *offsets; 225 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 226 227 // Expand offsets 228 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 229 CeedSize l_size; 230 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 231 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 232 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 233 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 234 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 235 CeedInt shift = num_comp; 236 if (comp_stride != 1) shift *= num_comp; 237 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 238 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 239 pointblock_offsets[i] = offsets[i] * shift; 240 } 241 242 // Create new restriction 243 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 244 pointblock_offsets, pointblock_rstr)); 245 246 // Cleanup 247 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 248 249 return CEED_ERROR_SUCCESS; 250 } 251 252 /** 253 @brief Core logic for assembling operator diagonal or point block diagonal 254 255 @param[in] op CeedOperator to assemble point block diagonal 256 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 257 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 258 @param[out] assembled CeedVector to store assembled diagonal 259 260 @return An error code: 0 - success, otherwise - failure 261 262 @ref Developer 263 **/ 264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 265 Ceed ceed; 266 CeedCall(CeedOperatorGetCeed(op, &ceed)); 267 268 // Assemble QFunction 269 CeedQFunction qf; 270 const CeedScalar *assembled_qf_array; 271 CeedVector assembled_qf; 272 CeedElemRestriction assembled_elem_rstr; 273 CeedInt num_input_fields, num_output_fields; 274 CeedInt layout[3]; 275 276 CeedCall(CeedOperatorGetQFunction(op, &qf)); 277 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 278 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 279 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 280 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 281 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 282 283 // Get assembly data 284 CeedOperatorAssemblyData data; 285 const CeedEvalMode **eval_modes_in, **eval_modes_out; 286 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 287 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 288 CeedBasis *active_bases; 289 CeedElemRestriction *active_elem_rstrs; 290 291 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 292 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 293 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 294 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 295 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 296 297 // Loop over all active bases 298 for (CeedInt b = 0; b < num_active_bases; b++) { 299 // Assemble point block diagonal restriction, if needed 300 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 301 302 if (is_pointblock) { 303 CeedElemRestriction point_block_elem_rstr; 304 305 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 306 diag_elem_rstr = point_block_elem_rstr; 307 } 308 309 // Create diagonal vector 310 CeedVector elem_diag; 311 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array; 315 CeedInt num_elem, num_nodes, num_qpts, num_components; 316 317 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 318 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 319 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 320 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 321 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 322 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 323 324 // Basis matrices 325 const CeedScalar *interp, *grad; 326 CeedScalar *identity = NULL; 327 bool has_eval_none = false; 328 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 329 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 330 } 331 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 332 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 333 } 334 if (has_eval_none) { 335 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 336 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 337 } 338 CeedCall(CeedBasisGetInterp(active_bases[b], &interp)); 339 CeedCall(CeedBasisGetGrad(active_bases[b], &grad)); 340 // Compute the diagonal of B^T D B 341 // Each element 342 for (CeedInt e = 0; e < num_elem; e++) { 343 CeedInt d_out = -1; 344 // Each basis eval mode pair 345 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 346 const CeedScalar *B_t = NULL; 347 if (eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out += 1; 348 CeedOperatorGetBasisPointer(eval_modes_out[b][e_out], identity, interp, &grad[d_out * num_qpts * num_nodes], &B_t); 349 CeedInt d_in = -1; 350 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 351 const CeedScalar *B = NULL; 352 if (eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in += 1; 353 CeedOperatorGetBasisPointer(eval_modes_in[b][e_in], identity, interp, &grad[d_in * num_qpts * num_nodes], &B); 354 // Each component 355 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 356 // Each qpt/node pair 357 for (CeedInt q = 0; q < num_qpts; q++) { 358 if (is_pointblock) { 359 // Point Block Diagonal 360 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 361 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 362 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 363 for (CeedInt n = 0; n < num_nodes; n++) { 364 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 365 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 366 } 367 } 368 } else { 369 // Diagonal Only 370 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 371 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 372 for (CeedInt n = 0; n < num_nodes; n++) { 373 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 374 } 375 } 376 } 377 } 378 } 379 } 380 } 381 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 382 383 // Assemble local operator diagonal 384 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 385 386 // Cleanup 387 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 388 CeedCall(CeedVectorDestroy(&elem_diag)); 389 CeedCall(CeedFree(&identity)); 390 } 391 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 392 CeedCall(CeedVectorDestroy(&assembled_qf)); 393 394 return CEED_ERROR_SUCCESS; 395 } 396 397 /** 398 @brief Core logic for assembling composite operator diagonal 399 400 @param[in] op CeedOperator to assemble point block diagonal 401 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 402 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 403 @param[out] assembled CeedVector to store assembled diagonal 404 405 @return An error code: 0 - success, otherwise - failure 406 407 @ref Developer 408 **/ 409 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 410 CeedVector assembled) { 411 CeedInt num_sub; 412 CeedOperator *suboperators; 413 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 414 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 415 for (CeedInt i = 0; i < num_sub; i++) { 416 if (is_pointblock) { 417 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 418 } else { 419 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 420 } 421 } 422 return CEED_ERROR_SUCCESS; 423 } 424 425 /** 426 @brief Build nonzero pattern for non-composite operator 427 428 Users should generally use CeedOperatorLinearAssembleSymbolic() 429 430 @param[in] op CeedOperator to assemble nonzero pattern 431 @param[in] offset Offset for number of entries 432 @param[out] rows Row number for each entry 433 @param[out] cols Column number for each entry 434 435 @return An error code: 0 - success, otherwise - failure 436 437 @ref Developer 438 **/ 439 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 440 Ceed ceed; 441 bool is_composite; 442 CeedCall(CeedOperatorGetCeed(op, &ceed)); 443 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 444 445 if (is_composite) { 446 // LCOV_EXCL_START 447 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 448 // LCOV_EXCL_STOP 449 } 450 451 CeedSize num_nodes; 452 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 453 CeedElemRestriction rstr_in; 454 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 455 CeedInt num_elem, elem_size, num_comp; 456 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 457 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 458 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 459 CeedInt layout_er[3]; 460 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 461 462 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 463 464 // Determine elem_dof relation 465 CeedVector index_vec; 466 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 467 CeedScalar *array; 468 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 469 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 470 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 471 CeedVector elem_dof; 472 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 473 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 474 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 475 const CeedScalar *elem_dof_a; 476 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 477 CeedCall(CeedVectorDestroy(&index_vec)); 478 479 // Determine i, j locations for element matrices 480 CeedInt count = 0; 481 for (CeedInt e = 0; e < num_elem; e++) { 482 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 483 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 484 for (CeedInt i = 0; i < elem_size; i++) { 485 for (CeedInt j = 0; j < elem_size; j++) { 486 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 487 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 488 489 const CeedInt row = elem_dof_a[elem_dof_index_row]; 490 const CeedInt col = elem_dof_a[elem_dof_index_col]; 491 492 rows[offset + count] = row; 493 cols[offset + count] = col; 494 count++; 495 } 496 } 497 } 498 } 499 } 500 if (count != local_num_entries) { 501 // LCOV_EXCL_START 502 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 503 // LCOV_EXCL_STOP 504 } 505 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 506 CeedCall(CeedVectorDestroy(&elem_dof)); 507 508 return CEED_ERROR_SUCCESS; 509 } 510 511 /** 512 @brief Assemble nonzero entries for non-composite operator 513 514 Users should generally use CeedOperatorLinearAssemble() 515 516 @param[in] op CeedOperator to assemble 517 @param[in] offset Offset for number of entries 518 @param[out] values Values to assemble into matrix 519 520 @return An error code: 0 - success, otherwise - failure 521 522 @ref Developer 523 **/ 524 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 525 Ceed ceed; 526 bool is_composite; 527 CeedCall(CeedOperatorGetCeed(op, &ceed)); 528 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 529 530 if (is_composite) { 531 // LCOV_EXCL_START 532 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 533 // LCOV_EXCL_STOP 534 } 535 536 // Early exit for empty operator 537 { 538 CeedInt num_elem = 0; 539 540 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 541 if (num_elem == 0) return CEED_ERROR_SUCCESS; 542 } 543 544 if (op->LinearAssembleSingle) { 545 // Backend version 546 CeedCall(op->LinearAssembleSingle(op, offset, values)); 547 return CEED_ERROR_SUCCESS; 548 } else { 549 // Operator fallback 550 CeedOperator op_fallback; 551 552 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 553 if (op_fallback) { 554 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 555 return CEED_ERROR_SUCCESS; 556 } 557 } 558 559 // Assemble QFunction 560 CeedQFunction qf; 561 CeedCall(CeedOperatorGetQFunction(op, &qf)); 562 CeedVector assembled_qf; 563 CeedElemRestriction rstr_q; 564 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 565 CeedSize qf_length; 566 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 567 568 CeedInt num_input_fields, num_output_fields; 569 CeedOperatorField *input_fields; 570 CeedOperatorField *output_fields; 571 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 572 573 // Get assembly data 574 CeedOperatorAssemblyData data; 575 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 576 const CeedEvalMode **eval_modes_in, **eval_modes_out; 577 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 578 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 579 &eval_modes_out, NULL, NULL)); 580 CeedBasis *bases; 581 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 582 CeedBasis basis_in = bases[0]; 583 584 if (num_active_bases > 1) { 585 // LCOV_EXCL_START 586 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 587 // LCOV_EXCL_STOP 588 } 589 if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) { 590 // LCOV_EXCL_START 591 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 592 // LCOV_EXCL_STOP 593 } 594 595 CeedElemRestriction active_rstr; 596 CeedInt num_elem, elem_size, num_qpts, num_comp; 597 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 598 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 599 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 600 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 601 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 602 603 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 604 605 // loop over elements and put in data structure 606 const CeedScalar *interp_in, *grad_in; 607 CeedCall(CeedBasisGetInterp(basis_in, &interp_in)); 608 CeedCall(CeedBasisGetGrad(basis_in, &grad_in)); 609 610 const CeedScalar *assembled_qf_array; 611 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 612 613 CeedInt layout_qf[3]; 614 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 615 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 616 617 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 618 const CeedScalar **B_mats_in, **B_mats_out; 619 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 620 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 621 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 622 CeedScalar elem_mat[elem_size * elem_size]; 623 CeedInt count = 0; 624 CeedScalar *vals; 625 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 626 for (CeedInt e = 0; e < num_elem; e++) { 627 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 628 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 629 // Compute B^T*D 630 for (CeedInt n = 0; n < elem_size; n++) { 631 for (CeedInt q = 0; q < num_qpts; q++) { 632 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 633 const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 634 CeedScalar sum = 0.0; 635 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 636 const CeedInt b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 637 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 638 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 639 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 640 } 641 BTD_mat[btd_index] = sum; 642 } 643 } 644 } 645 // form element matrix itself (for each block component) 646 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 647 648 // put element matrix in coordinate data structure 649 for (CeedInt i = 0; i < elem_size; i++) { 650 for (CeedInt j = 0; j < elem_size; j++) { 651 vals[offset + count] = elem_mat[i * elem_size + j]; 652 count++; 653 } 654 } 655 } 656 } 657 } 658 if (count != local_num_entries) { 659 // LCOV_EXCL_START 660 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 661 // LCOV_EXCL_STOP 662 } 663 CeedCall(CeedVectorRestoreArray(values, &vals)); 664 665 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 666 CeedCall(CeedVectorDestroy(&assembled_qf)); 667 668 return CEED_ERROR_SUCCESS; 669 } 670 671 /** 672 @brief Count number of entries for assembled CeedOperator 673 674 @param[in] op CeedOperator to assemble 675 @param[out] num_entries Number of entries in assembled representation 676 677 @return An error code: 0 - success, otherwise - failure 678 679 @ref Utility 680 **/ 681 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 682 bool is_composite; 683 CeedElemRestriction rstr; 684 CeedInt num_elem, elem_size, num_comp; 685 686 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 687 if (is_composite) { 688 // LCOV_EXCL_START 689 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 690 // LCOV_EXCL_STOP 691 } 692 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 693 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 694 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 695 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 696 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 697 698 return CEED_ERROR_SUCCESS; 699 } 700 701 /** 702 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 703 704 @param[in] op_fine Fine grid operator 705 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 706 @param[in] rstr_coarse Coarse grid restriction 707 @param[in] basis_coarse Coarse grid active vector basis 708 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 709 @param[out] op_coarse Coarse grid operator 710 @param[out] op_prolong Coarse to fine operator, or NULL 711 @param[out] op_restrict Fine to coarse operator, or NULL 712 713 @return An error code: 0 - success, otherwise - failure 714 715 @ref Developer 716 **/ 717 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 718 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 719 Ceed ceed; 720 CeedVector mult_vec = NULL; 721 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 722 723 // Check for composite operator 724 bool is_composite; 725 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 726 if (is_composite) { 727 // LCOV_EXCL_START 728 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 729 // LCOV_EXCL_STOP 730 } 731 732 // Coarse Grid 733 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 734 CeedElemRestriction rstr_fine = NULL; 735 // -- Clone input fields 736 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 737 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 738 rstr_fine = op_fine->input_fields[i]->elem_rstr; 739 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 740 } else { 741 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 742 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 743 } 744 } 745 // -- Clone output fields 746 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 747 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 748 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 749 } else { 750 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 751 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 752 } 753 } 754 // -- Clone QFunctionAssemblyData 755 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 756 757 // Multiplicity vector 758 if (op_restrict || op_prolong) { 759 CeedVector mult_e_vec; 760 761 if (!p_mult_fine) { 762 // LCOV_EXCL_START 763 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 764 // LCOV_EXCL_STOP 765 } 766 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 767 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 768 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 769 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 770 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 771 CeedCall(CeedVectorDestroy(&mult_e_vec)); 772 CeedCall(CeedVectorReciprocal(mult_vec)); 773 } 774 775 // Clone name 776 bool has_name = op_fine->name; 777 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 778 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 779 780 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 781 if ((op_restrict || op_prolong) && !basis_c_to_f) { 782 // LCOV_EXCL_START 783 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis"); 784 // LCOV_EXCL_STOP 785 } 786 787 // Restriction/Prolongation Operators 788 CeedInt num_comp; 789 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 790 791 // Restriction 792 if (op_restrict) { 793 CeedInt *num_comp_r_data; 794 CeedQFunction qf_restrict; 795 CeedQFunctionContext ctx_r; 796 797 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 798 CeedCall(CeedCalloc(1, &num_comp_r_data)); 799 num_comp_r_data[0] = num_comp; 800 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 801 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 802 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 803 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 804 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 805 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 806 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 807 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 808 809 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 810 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 811 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 812 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 813 814 // Set name 815 char *restriction_name; 816 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 817 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 818 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 819 CeedCall(CeedFree(&restriction_name)); 820 821 // Check 822 CeedCall(CeedOperatorCheckReady(*op_restrict)); 823 824 // Cleanup 825 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 826 } 827 828 // Prolongation 829 if (op_prolong) { 830 CeedInt *num_comp_p_data; 831 CeedQFunction qf_prolong; 832 CeedQFunctionContext ctx_p; 833 834 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 835 CeedCall(CeedCalloc(1, &num_comp_p_data)); 836 num_comp_p_data[0] = num_comp; 837 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 838 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 839 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 840 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 841 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 842 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 843 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 844 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 845 846 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 847 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 848 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 849 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 850 851 // Set name 852 char *prolongation_name; 853 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 854 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 855 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 856 CeedCall(CeedFree(&prolongation_name)); 857 858 // Check 859 CeedCall(CeedOperatorCheckReady(*op_prolong)); 860 861 // Cleanup 862 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 863 } 864 865 // Check 866 CeedCall(CeedOperatorCheckReady(*op_coarse)); 867 868 // Cleanup 869 CeedCall(CeedVectorDestroy(&mult_vec)); 870 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 871 872 return CEED_ERROR_SUCCESS; 873 } 874 875 /** 876 @brief Build 1D mass matrix and Laplacian with perturbation 877 878 @param[in] interp_1d Interpolation matrix in one dimension 879 @param[in] grad_1d Gradient matrix in one dimension 880 @param[in] q_weight_1d Quadrature weights in one dimension 881 @param[in] P_1d Number of basis nodes in one dimension 882 @param[in] Q_1d Number of quadrature points in one dimension 883 @param[in] dim Dimension of basis 884 @param[out] mass Assembled mass matrix in one dimension 885 @param[out] laplace Assembled perturbed Laplacian in one dimension 886 887 @return An error code: 0 - success, otherwise - failure 888 889 @ref Developer 890 **/ 891 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, 892 CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 893 for (CeedInt i = 0; i < P_1d; i++) { 894 for (CeedInt j = 0; j < P_1d; j++) { 895 CeedScalar sum = 0.0; 896 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 897 mass[i + j * P_1d] = sum; 898 } 899 } 900 // -- Laplacian 901 for (CeedInt i = 0; i < P_1d; i++) { 902 for (CeedInt j = 0; j < P_1d; j++) { 903 CeedScalar sum = 0.0; 904 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 905 laplace[i + j * P_1d] = sum; 906 } 907 } 908 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 909 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 910 return CEED_ERROR_SUCCESS; 911 } 912 CeedPragmaOptimizeOn; 913 914 /// @} 915 916 /// ---------------------------------------------------------------------------- 917 /// CeedOperator Backend API 918 /// ---------------------------------------------------------------------------- 919 /// @addtogroup CeedOperatorBackend 920 /// @{ 921 922 /** 923 @brief Create object holding CeedQFunction assembly data for CeedOperator 924 925 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 926 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 927 928 @return An error code: 0 - success, otherwise - failure 929 930 @ref Backend 931 **/ 932 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 933 CeedCall(CeedCalloc(1, data)); 934 (*data)->ref_count = 1; 935 (*data)->ceed = ceed; 936 CeedCall(CeedReference(ceed)); 937 938 return CEED_ERROR_SUCCESS; 939 } 940 941 /** 942 @brief Increment the reference counter for a CeedQFunctionAssemblyData 943 944 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 945 946 @return An error code: 0 - success, otherwise - failure 947 948 @ref Backend 949 **/ 950 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 951 data->ref_count++; 952 return CEED_ERROR_SUCCESS; 953 } 954 955 /** 956 @brief Set re-use of CeedQFunctionAssemblyData 957 958 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 959 @param[in] reuse_data Boolean flag indicating data re-use 960 961 @return An error code: 0 - success, otherwise - failure 962 963 @ref Backend 964 **/ 965 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 966 data->reuse_data = reuse_data; 967 data->needs_data_update = true; 968 return CEED_ERROR_SUCCESS; 969 } 970 971 /** 972 @brief Mark QFunctionAssemblyData as stale 973 974 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 975 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 976 977 @return An error code: 0 - success, otherwise - failure 978 979 @ref Backend 980 **/ 981 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 982 data->needs_data_update = needs_data_update; 983 return CEED_ERROR_SUCCESS; 984 } 985 986 /** 987 @brief Determine if QFunctionAssemblyData needs update 988 989 @param[in] data CeedQFunctionAssemblyData to mark as stale 990 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 991 992 @return An error code: 0 - success, otherwise - failure 993 994 @ref Backend 995 **/ 996 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 997 *is_update_needed = !data->reuse_data || data->needs_data_update; 998 return CEED_ERROR_SUCCESS; 999 } 1000 1001 /** 1002 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1003 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1004 1005 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1006 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1007 CeedQFunctionAssemblyData. 1008 1009 @param[in] data CeedQFunctionAssemblyData to copy reference to 1010 @param[in,out] data_copy Variable to store copied reference 1011 1012 @return An error code: 0 - success, otherwise - failure 1013 1014 @ref Backend 1015 **/ 1016 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1017 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1018 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1019 *data_copy = data; 1020 return CEED_ERROR_SUCCESS; 1021 } 1022 1023 /** 1024 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1025 1026 @param[in] data CeedQFunctionAssemblyData to retrieve status 1027 @param[out] is_setup Boolean flag for setup status 1028 1029 @return An error code: 0 - success, otherwise - failure 1030 1031 @ref Backend 1032 **/ 1033 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1034 *is_setup = data->is_setup; 1035 return CEED_ERROR_SUCCESS; 1036 } 1037 1038 /** 1039 @brief Set internal objects for CeedQFunctionAssemblyData 1040 1041 @param[in,out] data CeedQFunctionAssemblyData to set objects 1042 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1043 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1044 1045 @return An error code: 0 - success, otherwise - failure 1046 1047 @ref Backend 1048 **/ 1049 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1050 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1051 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1052 1053 data->is_setup = true; 1054 return CEED_ERROR_SUCCESS; 1055 } 1056 1057 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1058 if (!data->is_setup) { 1059 // LCOV_EXCL_START 1060 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1061 // LCOV_EXCL_STOP 1062 } 1063 1064 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1065 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1066 1067 return CEED_ERROR_SUCCESS; 1068 } 1069 1070 /** 1071 @brief Destroy CeedQFunctionAssemblyData 1072 1073 @param[in,out] data CeedQFunctionAssemblyData to destroy 1074 1075 @return An error code: 0 - success, otherwise - failure 1076 1077 @ref Backend 1078 **/ 1079 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1080 if (!*data || --(*data)->ref_count > 0) { 1081 *data = NULL; 1082 return CEED_ERROR_SUCCESS; 1083 } 1084 CeedCall(CeedDestroy(&(*data)->ceed)); 1085 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1086 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1087 1088 CeedCall(CeedFree(data)); 1089 return CEED_ERROR_SUCCESS; 1090 } 1091 1092 /** 1093 @brief Get CeedOperatorAssemblyData 1094 1095 @param[in] op CeedOperator to assemble 1096 @param[out] data CeedQFunctionAssemblyData 1097 1098 @return An error code: 0 - success, otherwise - failure 1099 1100 @ref Backend 1101 **/ 1102 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1103 if (!op->op_assembled) { 1104 CeedOperatorAssemblyData data; 1105 1106 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1107 op->op_assembled = data; 1108 } 1109 *data = op->op_assembled; 1110 1111 return CEED_ERROR_SUCCESS; 1112 } 1113 1114 /** 1115 @brief Create object holding CeedOperator assembly data. 1116 1117 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1118 An array with references to the corresponding active CeedElemRestrictions is also stored. 1119 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1120 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1121 CeedEvalMode. 1122 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1123 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1124 1125 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1126 @param[in] op CeedOperator to be assembled 1127 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1128 1129 @return An error code: 0 - success, otherwise - failure 1130 1131 @ref Backend 1132 **/ 1133 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1134 CeedInt num_active_bases = 0; 1135 1136 // Allocate 1137 CeedCall(CeedCalloc(1, data)); 1138 (*data)->ceed = ceed; 1139 CeedCall(CeedReference(ceed)); 1140 1141 // Build OperatorAssembly data 1142 CeedQFunction qf; 1143 CeedQFunctionField *qf_fields; 1144 CeedOperatorField *op_fields; 1145 CeedInt num_input_fields; 1146 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1147 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1148 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1149 1150 // Determine active input basis 1151 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1152 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1153 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1154 1155 for (CeedInt i = 0; i < num_input_fields; i++) { 1156 CeedVector vec; 1157 1158 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1159 if (vec == CEED_VECTOR_ACTIVE) { 1160 CeedInt index = -1, dim = 1, num_components; 1161 CeedBasis basis_in = NULL; 1162 CeedEvalMode eval_mode; 1163 1164 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1165 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1166 CeedCall(CeedBasisGetNumComponents(basis_in, &num_components)); 1167 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1168 for (CeedInt i = 0; i < num_active_bases; i++) { 1169 if ((*data)->active_bases[i] == basis_in) index = i; 1170 } 1171 if (index == -1) { 1172 CeedElemRestriction elem_rstr_in; 1173 1174 index = num_active_bases; 1175 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1176 (*data)->active_bases[num_active_bases] = NULL; 1177 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1178 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1179 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1180 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1181 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1182 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1183 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1184 num_eval_modes_in[index] = 0; 1185 num_eval_modes_out[index] = 0; 1186 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1187 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1188 eval_modes_in[index] = NULL; 1189 eval_modes_out[index] = NULL; 1190 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1191 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1192 eval_mode_offsets_in[index] = NULL; 1193 eval_mode_offsets_out[index] = NULL; 1194 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1195 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1196 (*data)->assembled_bases_in[index] = NULL; 1197 (*data)->assembled_bases_out[index] = NULL; 1198 num_active_bases++; 1199 } 1200 switch (eval_mode) { 1201 case CEED_EVAL_NONE: 1202 case CEED_EVAL_INTERP: 1203 CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_modes_in[index])); 1204 CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_mode_offsets_in[index])); 1205 eval_modes_in[index][num_eval_modes_in[index]] = eval_mode; 1206 eval_mode_offsets_in[index][num_eval_modes_in[index]] = offset; 1207 offset += num_components; 1208 num_eval_modes_in[index] += 1; 1209 break; 1210 case CEED_EVAL_GRAD: 1211 CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_modes_in[index])); 1212 CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_mode_offsets_in[index])); 1213 for (CeedInt d = 0; d < dim; d++) { 1214 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1215 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1216 offset += num_components; 1217 } 1218 num_eval_modes_in[index] += dim; 1219 break; 1220 case CEED_EVAL_WEIGHT: 1221 case CEED_EVAL_DIV: 1222 case CEED_EVAL_CURL: 1223 break; // Caught by QF Assembly 1224 } 1225 } 1226 } 1227 (*data)->num_eval_modes_in = num_eval_modes_in; 1228 (*data)->eval_modes_in = eval_modes_in; 1229 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1230 1231 // Determine active output basis 1232 CeedInt num_output_fields; 1233 1234 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1235 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1236 offset = 0; 1237 for (CeedInt i = 0; i < num_output_fields; i++) { 1238 CeedVector vec; 1239 1240 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1241 if (vec == CEED_VECTOR_ACTIVE) { 1242 CeedInt index = -1, dim = 1, num_components; 1243 CeedBasis basis_out = NULL; 1244 CeedEvalMode eval_mode; 1245 1246 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1247 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1248 CeedCall(CeedBasisGetNumComponents(basis_out, &num_components)); 1249 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1250 for (CeedInt i = 0; i < num_active_bases; i++) { 1251 if ((*data)->active_bases[i] == basis_out) index = i; 1252 } 1253 if (index == -1) { 1254 CeedElemRestriction elem_rstr_out; 1255 1256 index = num_active_bases; 1257 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1258 (*data)->active_bases[num_active_bases] = NULL; 1259 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1260 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1261 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1262 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1263 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1264 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1265 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1266 num_eval_modes_in[index] = 0; 1267 num_eval_modes_out[index] = 0; 1268 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1269 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1270 eval_modes_in[index] = NULL; 1271 eval_modes_out[index] = NULL; 1272 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1273 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1274 eval_mode_offsets_in[index] = NULL; 1275 eval_mode_offsets_out[index] = NULL; 1276 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1277 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1278 (*data)->assembled_bases_in[index] = NULL; 1279 (*data)->assembled_bases_out[index] = NULL; 1280 num_active_bases++; 1281 } 1282 switch (eval_mode) { 1283 case CEED_EVAL_NONE: 1284 case CEED_EVAL_INTERP: 1285 CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_modes_out[index])); 1286 CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_mode_offsets_out[index])); 1287 eval_modes_out[index][num_eval_modes_out[index]] = eval_mode; 1288 eval_mode_offsets_out[index][num_eval_modes_out[index]] = offset; 1289 offset += num_components; 1290 num_eval_modes_out[index] += 1; 1291 break; 1292 case CEED_EVAL_GRAD: 1293 CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_modes_out[index])); 1294 CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_mode_offsets_out[index])); 1295 for (CeedInt d = 0; d < dim; d++) { 1296 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1297 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1298 offset += num_components; 1299 } 1300 num_eval_modes_out[index] += dim; 1301 break; 1302 case CEED_EVAL_WEIGHT: 1303 case CEED_EVAL_DIV: 1304 case CEED_EVAL_CURL: 1305 break; // Caught by QF Assembly 1306 } 1307 } 1308 } 1309 (*data)->num_output_components = offset; 1310 (*data)->num_eval_modes_out = num_eval_modes_out; 1311 (*data)->eval_modes_out = eval_modes_out; 1312 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1313 (*data)->num_active_bases = num_active_bases; 1314 1315 return CEED_ERROR_SUCCESS; 1316 } 1317 1318 /** 1319 @brief Get CeedOperator CeedEvalModes for assembly. 1320 1321 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1322 1323 @param[in] data CeedOperatorAssemblyData 1324 @param[out] num_active_bases Total number of active bases 1325 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1326 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1327 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1328 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1329 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1330 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1331 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1332 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1333 including contributions of all active bases 1334 1335 @return An error code: 0 - success, otherwise - failure 1336 1337 1338 @ref Backend 1339 **/ 1340 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1341 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1342 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1343 if (num_active_bases) *num_active_bases = data->num_active_bases; 1344 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1345 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1346 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1347 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1348 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1349 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1350 if (num_output_components) *num_output_components = data->num_output_components; 1351 1352 return CEED_ERROR_SUCCESS; 1353 } 1354 1355 /** 1356 @brief Get CeedOperator CeedBasis data for assembly. 1357 1358 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1359 1360 @param[in] data CeedOperatorAssemblyData 1361 @param[out] num_active_bases Number of active bases, or NULL 1362 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1363 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1364 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1365 1366 @return An error code: 0 - success, otherwise - failure 1367 1368 @ref Backend 1369 **/ 1370 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1371 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1372 // Assemble B_in, B_out if needed 1373 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1374 CeedInt num_qpts; 1375 1376 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1377 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1378 CeedInt elem_size; 1379 CeedScalar *B_in = NULL, *identity = NULL; 1380 const CeedScalar *interp_in, *grad_in; 1381 bool has_eval_none = false; 1382 1383 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size)); 1384 CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_in[b], &B_in)); 1385 1386 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1387 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1388 } 1389 if (has_eval_none) { 1390 CeedCall(CeedCalloc(num_qpts * elem_size, &identity)); 1391 for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { 1392 identity[i * elem_size + i] = 1.0; 1393 } 1394 } 1395 CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_in)); 1396 CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_in)); 1397 1398 for (CeedInt q = 0; q < num_qpts; q++) { 1399 for (CeedInt n = 0; n < elem_size; n++) { 1400 CeedInt d_in = -1; 1401 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1402 const CeedInt qq = data->num_eval_modes_in[b] * q; 1403 const CeedScalar *B = NULL; 1404 1405 if (data->eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in++; 1406 CeedOperatorGetBasisPointer(data->eval_modes_in[b][e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &B); 1407 B_in[(qq + e_in) * elem_size + n] = B[q * elem_size + n]; 1408 } 1409 } 1410 } 1411 if (identity) CeedCall(CeedFree(identity)); 1412 data->assembled_bases_in[b] = B_in; 1413 } 1414 } 1415 1416 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1417 CeedInt num_qpts; 1418 1419 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1420 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1421 CeedInt elem_size; 1422 const CeedScalar *interp_out, *grad_out; 1423 bool has_eval_none = false; 1424 CeedScalar *B_out = NULL, *identity = NULL; 1425 1426 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size)); 1427 CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_out[b], &B_out)); 1428 1429 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1430 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1431 } 1432 if (has_eval_none) { 1433 CeedCall(CeedCalloc(num_qpts * elem_size, &identity)); 1434 for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { 1435 identity[i * elem_size + i] = 1.0; 1436 } 1437 } 1438 CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_out)); 1439 CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_out)); 1440 1441 for (CeedInt q = 0; q < num_qpts; q++) { 1442 for (CeedInt n = 0; n < elem_size; n++) { 1443 CeedInt d_out = -1; 1444 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1445 const CeedInt qq = data->num_eval_modes_out[b] * q; 1446 const CeedScalar *B = NULL; 1447 1448 if (data->eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out++; 1449 CeedOperatorGetBasisPointer(data->eval_modes_out[b][e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &B); 1450 B_out[(qq + e_out) * elem_size + n] = B[q * elem_size + n]; 1451 } 1452 } 1453 } 1454 if (identity) CeedCall(CeedFree(identity)); 1455 data->assembled_bases_out[b] = B_out; 1456 } 1457 } 1458 1459 // Pass out assembled data 1460 if (active_bases) *active_bases = data->active_bases; 1461 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1462 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1463 1464 return CEED_ERROR_SUCCESS; 1465 } 1466 1467 /** 1468 @brief Get CeedOperator CeedBasis data for assembly. 1469 1470 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1471 1472 @param[in] data CeedOperatorAssemblyData 1473 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1474 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1475 1476 @return An error code: 0 - success, otherwise - failure 1477 1478 @ref Backend 1479 **/ 1480 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1481 CeedElemRestriction **active_elem_rstrs) { 1482 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1483 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1484 1485 return CEED_ERROR_SUCCESS; 1486 } 1487 1488 /** 1489 @brief Destroy CeedOperatorAssemblyData 1490 1491 @param[in,out] data CeedOperatorAssemblyData to destroy 1492 1493 @return An error code: 0 - success, otherwise - failure 1494 1495 @ref Backend 1496 **/ 1497 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1498 if (!*data) { 1499 *data = NULL; 1500 return CEED_ERROR_SUCCESS; 1501 } 1502 CeedCall(CeedDestroy(&(*data)->ceed)); 1503 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1504 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1505 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1506 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1507 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1508 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1509 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1510 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1511 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1512 } 1513 CeedCall(CeedFree(&(*data)->active_bases)); 1514 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1515 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1516 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1517 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1518 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1519 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1520 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1521 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1522 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1523 1524 CeedCall(CeedFree(data)); 1525 return CEED_ERROR_SUCCESS; 1526 } 1527 1528 /// @} 1529 1530 /// ---------------------------------------------------------------------------- 1531 /// CeedOperator Public API 1532 /// ---------------------------------------------------------------------------- 1533 /// @addtogroup CeedOperatorUser 1534 /// @{ 1535 1536 /** 1537 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1538 1539 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1540 The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices 1541 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the 1542 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, 1543 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] 1544 and producing the output [dv_0, dv_1, v]. 1545 1546 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1547 1548 @param[in] op CeedOperator to assemble CeedQFunction 1549 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1550 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1551 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1552 1553 @return An error code: 0 - success, otherwise - failure 1554 1555 @ref User 1556 **/ 1557 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1558 CeedCall(CeedOperatorCheckReady(op)); 1559 1560 if (op->LinearAssembleQFunction) { 1561 // Backend version 1562 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1563 } else { 1564 // Operator fallback 1565 CeedOperator op_fallback; 1566 1567 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1568 if (op_fallback) { 1569 CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1570 } else { 1571 // LCOV_EXCL_START 1572 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1573 // LCOV_EXCL_STOP 1574 } 1575 } 1576 return CEED_ERROR_SUCCESS; 1577 } 1578 1579 /** 1580 @brief Assemble CeedQFunction and store result internally. 1581 Return copied references of stored data to the caller. 1582 Caller is responsible for ownership and destruction of the copied references. 1583 See also @ref CeedOperatorLinearAssembleQFunction 1584 1585 @param[in] op CeedOperator to assemble CeedQFunction 1586 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1587 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1588 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1589 1590 @return An error code: 0 - success, otherwise - failure 1591 1592 @ref User 1593 **/ 1594 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1595 CeedCall(CeedOperatorCheckReady(op)); 1596 1597 if (op->LinearAssembleQFunctionUpdate) { 1598 // Backend version 1599 bool qf_assembled_is_setup; 1600 CeedVector assembled_vec = NULL; 1601 CeedElemRestriction assembled_rstr = NULL; 1602 1603 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1604 if (qf_assembled_is_setup) { 1605 bool update_needed; 1606 1607 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1608 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1609 if (update_needed) { 1610 CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1611 } 1612 } else { 1613 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1614 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1615 } 1616 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1617 1618 // Copy reference from internally held copy 1619 *assembled = NULL; 1620 *rstr = NULL; 1621 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1622 CeedCall(CeedVectorDestroy(&assembled_vec)); 1623 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1624 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1625 } else { 1626 // Operator fallback 1627 CeedOperator op_fallback; 1628 1629 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1630 if (op_fallback) { 1631 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1632 } else { 1633 // LCOV_EXCL_START 1634 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1635 // LCOV_EXCL_STOP 1636 } 1637 } 1638 1639 return CEED_ERROR_SUCCESS; 1640 } 1641 1642 /** 1643 @brief Assemble the diagonal of a square linear CeedOperator 1644 1645 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1646 1647 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1648 1649 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1650 1651 @param[in] op CeedOperator to assemble CeedQFunction 1652 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1653 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1654 1655 @return An error code: 0 - success, otherwise - failure 1656 1657 @ref User 1658 **/ 1659 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1660 bool is_composite; 1661 CeedCall(CeedOperatorCheckReady(op)); 1662 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1663 1664 CeedSize input_size = 0, output_size = 0; 1665 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1666 if (input_size != output_size) { 1667 // LCOV_EXCL_START 1668 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1669 // LCOV_EXCL_STOP 1670 } 1671 1672 // Early exit for empty operator 1673 if (!is_composite) { 1674 CeedInt num_elem = 0; 1675 1676 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1677 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1678 } 1679 1680 if (op->LinearAssembleDiagonal) { 1681 // Backend version 1682 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1683 return CEED_ERROR_SUCCESS; 1684 } else if (op->LinearAssembleAddDiagonal) { 1685 // Backend version with zeroing first 1686 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1687 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1688 return CEED_ERROR_SUCCESS; 1689 } else { 1690 // Operator fallback 1691 CeedOperator op_fallback; 1692 1693 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1694 if (op_fallback) { 1695 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1696 return CEED_ERROR_SUCCESS; 1697 } 1698 } 1699 // Default interface implementation 1700 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1701 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1702 1703 return CEED_ERROR_SUCCESS; 1704 } 1705 1706 /** 1707 @brief Assemble the diagonal of a square linear CeedOperator 1708 1709 This sums into a CeedVector the diagonal of a linear CeedOperator. 1710 1711 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1712 1713 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1714 1715 @param[in] op CeedOperator to assemble CeedQFunction 1716 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1717 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1718 1719 @return An error code: 0 - success, otherwise - failure 1720 1721 @ref User 1722 **/ 1723 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1724 bool is_composite; 1725 CeedCall(CeedOperatorCheckReady(op)); 1726 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1727 1728 CeedSize input_size = 0, output_size = 0; 1729 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1730 if (input_size != output_size) { 1731 // LCOV_EXCL_START 1732 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1733 // LCOV_EXCL_STOP 1734 } 1735 1736 // Early exit for empty operator 1737 if (!is_composite) { 1738 CeedInt num_elem = 0; 1739 1740 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1741 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1742 } 1743 1744 if (op->LinearAssembleAddDiagonal) { 1745 // Backend version 1746 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1747 return CEED_ERROR_SUCCESS; 1748 } else { 1749 // Operator fallback 1750 CeedOperator op_fallback; 1751 1752 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1753 if (op_fallback) { 1754 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1755 return CEED_ERROR_SUCCESS; 1756 } 1757 } 1758 // Default interface implementation 1759 if (is_composite) { 1760 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1761 } else { 1762 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1763 } 1764 1765 return CEED_ERROR_SUCCESS; 1766 } 1767 1768 /** 1769 @brief Assemble the point block diagonal of a square linear CeedOperator 1770 1771 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1772 1773 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1774 1775 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1776 1777 @param[in] op CeedOperator to assemble CeedQFunction 1778 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1779 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1780 component in]. 1781 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1782 1783 @return An error code: 0 - success, otherwise - failure 1784 1785 @ref User 1786 **/ 1787 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1788 bool is_composite; 1789 CeedCall(CeedOperatorCheckReady(op)); 1790 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1791 1792 CeedSize input_size = 0, output_size = 0; 1793 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1794 if (input_size != output_size) { 1795 // LCOV_EXCL_START 1796 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1797 // LCOV_EXCL_STOP 1798 } 1799 1800 // Early exit for empty operator 1801 if (!is_composite) { 1802 CeedInt num_elem = 0; 1803 1804 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1805 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1806 } 1807 1808 if (op->LinearAssemblePointBlockDiagonal) { 1809 // Backend version 1810 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1811 return CEED_ERROR_SUCCESS; 1812 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1813 // Backend version with zeroing first 1814 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1815 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1816 return CEED_ERROR_SUCCESS; 1817 } else { 1818 // Operator fallback 1819 CeedOperator op_fallback; 1820 1821 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1822 if (op_fallback) { 1823 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1824 return CEED_ERROR_SUCCESS; 1825 } 1826 } 1827 // Default interface implementation 1828 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1829 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1830 1831 return CEED_ERROR_SUCCESS; 1832 } 1833 1834 /** 1835 @brief Assemble the point block diagonal of a square linear CeedOperator 1836 1837 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1838 1839 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1840 1841 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1842 1843 @param[in] op CeedOperator to assemble CeedQFunction 1844 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1845 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1846 component in]. 1847 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1848 1849 @return An error code: 0 - success, otherwise - failure 1850 1851 @ref User 1852 **/ 1853 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1854 bool is_composite; 1855 CeedCall(CeedOperatorCheckReady(op)); 1856 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1857 1858 CeedSize input_size = 0, output_size = 0; 1859 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1860 if (input_size != output_size) { 1861 // LCOV_EXCL_START 1862 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1863 // LCOV_EXCL_STOP 1864 } 1865 1866 // Early exit for empty operator 1867 if (!is_composite) { 1868 CeedInt num_elem = 0; 1869 1870 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1871 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1872 } 1873 1874 if (op->LinearAssembleAddPointBlockDiagonal) { 1875 // Backend version 1876 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1877 return CEED_ERROR_SUCCESS; 1878 } else { 1879 // Operator fallback 1880 CeedOperator op_fallback; 1881 1882 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1883 if (op_fallback) { 1884 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1885 return CEED_ERROR_SUCCESS; 1886 } 1887 } 1888 // Default interface implementation 1889 if (is_composite) { 1890 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1891 } else { 1892 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1893 } 1894 1895 return CEED_ERROR_SUCCESS; 1896 } 1897 1898 /** 1899 @brief Fully assemble the nonzero pattern of a linear operator. 1900 1901 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1902 1903 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1904 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j) 1905 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1906 1907 This will generally be slow unless your operator is low-order. 1908 1909 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1910 1911 @param[in] op CeedOperator to assemble 1912 @param[out] num_entries Number of entries in coordinate nonzero pattern 1913 @param[out] rows Row number for each entry 1914 @param[out] cols Column number for each entry 1915 1916 @ref User 1917 **/ 1918 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1919 CeedInt num_suboperators, single_entries; 1920 CeedOperator *sub_operators; 1921 bool is_composite; 1922 CeedCall(CeedOperatorCheckReady(op)); 1923 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1924 1925 if (op->LinearAssembleSymbolic) { 1926 // Backend version 1927 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1928 return CEED_ERROR_SUCCESS; 1929 } else { 1930 // Operator fallback 1931 CeedOperator op_fallback; 1932 1933 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1934 if (op_fallback) { 1935 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1936 return CEED_ERROR_SUCCESS; 1937 } 1938 } 1939 1940 // Default interface implementation 1941 1942 // count entries and allocate rows, cols arrays 1943 *num_entries = 0; 1944 if (is_composite) { 1945 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1946 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1947 for (CeedInt k = 0; k < num_suboperators; ++k) { 1948 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1949 *num_entries += single_entries; 1950 } 1951 } else { 1952 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1953 *num_entries += single_entries; 1954 } 1955 CeedCall(CeedCalloc(*num_entries, rows)); 1956 CeedCall(CeedCalloc(*num_entries, cols)); 1957 1958 // assemble nonzero locations 1959 CeedInt offset = 0; 1960 if (is_composite) { 1961 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1962 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1963 for (CeedInt k = 0; k < num_suboperators; ++k) { 1964 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1965 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1966 offset += single_entries; 1967 } 1968 } else { 1969 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1970 } 1971 1972 return CEED_ERROR_SUCCESS; 1973 } 1974 1975 /** 1976 @brief Fully assemble the nonzero entries of a linear operator. 1977 1978 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1979 1980 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1981 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added, 1982 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1983 1984 This will generally be slow unless your operator is low-order. 1985 1986 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1987 1988 @param[in] op CeedOperator to assemble 1989 @param[out] values Values to assemble into matrix 1990 1991 @ref User 1992 **/ 1993 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1994 CeedInt num_suboperators, single_entries = 0; 1995 CeedOperator *sub_operators; 1996 bool is_composite; 1997 CeedCall(CeedOperatorCheckReady(op)); 1998 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1999 2000 // Early exit for empty operator 2001 if (!is_composite) { 2002 CeedInt num_elem = 0; 2003 2004 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2005 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2006 } 2007 2008 if (op->LinearAssemble) { 2009 // Backend version 2010 CeedCall(op->LinearAssemble(op, values)); 2011 return CEED_ERROR_SUCCESS; 2012 } else { 2013 // Operator fallback 2014 CeedOperator op_fallback; 2015 2016 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2017 if (op_fallback) { 2018 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2019 return CEED_ERROR_SUCCESS; 2020 } 2021 } 2022 2023 // Default interface implementation 2024 CeedInt offset = 0; 2025 CeedCall(CeedVectorSetValue(values, 0.0)); 2026 if (is_composite) { 2027 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2028 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2029 for (CeedInt k = 0; k < num_suboperators; k++) { 2030 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2031 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2032 offset += single_entries; 2033 } 2034 } else { 2035 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2036 } 2037 2038 return CEED_ERROR_SUCCESS; 2039 } 2040 2041 /** 2042 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2043 2044 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2045 2046 @param[in] op Composite CeedOperator 2047 @param[in] num_skip_indices Number of suboperators to skip 2048 @param[in] skip_indices Array of indices of suboperators to skip 2049 @param[out] mult Vector to store multiplicity (of size l_size) 2050 2051 @return An error code: 0 - success, otherwise - failure 2052 2053 @ref User 2054 **/ 2055 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2056 CeedCall(CeedOperatorCheckReady(op)); 2057 2058 Ceed ceed; 2059 CeedInt num_suboperators; 2060 CeedSize l_vec_len; 2061 CeedScalar *mult_array; 2062 CeedVector ones_l_vec; 2063 CeedElemRestriction elem_rstr; 2064 CeedOperator *sub_operators; 2065 2066 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2067 2068 // Zero mult vector 2069 CeedCall(CeedVectorSetValue(mult, 0.0)); 2070 2071 // Get suboperators 2072 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2073 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2074 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2075 2076 // Work vector 2077 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2078 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2079 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2080 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2081 2082 // Compute multiplicity across suboperators 2083 for (CeedInt i = 0; i < num_suboperators; i++) { 2084 const CeedScalar *sub_mult_array; 2085 CeedVector sub_mult_l_vec, ones_e_vec; 2086 2087 // -- Check for suboperator to skip 2088 for (CeedInt j = 0; j < num_skip_indices; j++) { 2089 if (skip_indices[j] == i) continue; 2090 } 2091 2092 // -- Sub operator multiplicity 2093 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2094 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2095 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2096 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2097 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2098 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2099 // ---- Flag every node present in the current suboperator 2100 for (CeedInt j = 0; j < l_vec_len; j++) { 2101 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2102 } 2103 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2104 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2105 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2106 } 2107 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2108 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2109 2110 return CEED_ERROR_SUCCESS; 2111 } 2112 2113 /** 2114 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2115 grid interpolation 2116 2117 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2118 2119 @param[in] op_fine Fine grid operator 2120 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2121 @param[in] rstr_coarse Coarse grid restriction 2122 @param[in] basis_coarse Coarse grid active vector basis 2123 @param[out] op_coarse Coarse grid operator 2124 @param[out] op_prolong Coarse to fine operator, or NULL 2125 @param[out] op_restrict Fine to coarse operator, or NULL 2126 2127 @return An error code: 0 - success, otherwise - failure 2128 2129 @ref User 2130 **/ 2131 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2132 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2133 CeedCall(CeedOperatorCheckReady(op_fine)); 2134 2135 // Build prolongation matrix, if required 2136 CeedBasis basis_c_to_f = NULL; 2137 if (op_prolong || op_restrict) { 2138 CeedBasis basis_fine; 2139 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2140 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2141 } 2142 2143 // Core code 2144 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2145 2146 return CEED_ERROR_SUCCESS; 2147 } 2148 2149 /** 2150 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2151 2152 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2153 2154 @param[in] op_fine Fine grid operator 2155 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2156 @param[in] rstr_coarse Coarse grid restriction 2157 @param[in] basis_coarse Coarse grid active vector basis 2158 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2159 @param[out] op_coarse Coarse grid operator 2160 @param[out] op_prolong Coarse to fine operator, or NULL 2161 @param[out] op_restrict Fine to coarse operator, or NULL 2162 2163 @return An error code: 0 - success, otherwise - failure 2164 2165 @ref User 2166 **/ 2167 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2168 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2169 CeedOperator *op_restrict) { 2170 CeedCall(CeedOperatorCheckReady(op_fine)); 2171 Ceed ceed; 2172 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2173 2174 // Check for compatible quadrature spaces 2175 CeedBasis basis_fine; 2176 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2177 CeedInt Q_f, Q_c; 2178 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2179 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2180 if (Q_f != Q_c) { 2181 // LCOV_EXCL_START 2182 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2183 // LCOV_EXCL_STOP 2184 } 2185 2186 // Create coarse to fine basis, if required 2187 CeedBasis basis_c_to_f = NULL; 2188 if (op_prolong || op_restrict) { 2189 // Check if interpolation matrix is provided 2190 if (!interp_c_to_f) { 2191 // LCOV_EXCL_START 2192 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2193 // LCOV_EXCL_STOP 2194 } 2195 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2196 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2197 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2198 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2199 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2200 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2201 CeedScalar *q_ref, *q_weight, *grad; 2202 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2203 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2204 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2205 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2206 CeedCall(CeedFree(&q_ref)); 2207 CeedCall(CeedFree(&q_weight)); 2208 CeedCall(CeedFree(&grad)); 2209 } 2210 2211 // Core code 2212 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2213 return CEED_ERROR_SUCCESS; 2214 } 2215 2216 /** 2217 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2218 2219 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2220 2221 @param[in] op_fine Fine grid operator 2222 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2223 @param[in] rstr_coarse Coarse grid restriction 2224 @param[in] basis_coarse Coarse grid active vector basis 2225 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2226 @param[out] op_coarse Coarse grid operator 2227 @param[out] op_prolong Coarse to fine operator, or NULL 2228 @param[out] op_restrict Fine to coarse operator, or NULL 2229 2230 @return An error code: 0 - success, otherwise - failure 2231 2232 @ref User 2233 **/ 2234 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2235 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2236 CeedOperator *op_restrict) { 2237 CeedCall(CeedOperatorCheckReady(op_fine)); 2238 Ceed ceed; 2239 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2240 2241 // Check for compatible quadrature spaces 2242 CeedBasis basis_fine; 2243 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2244 CeedInt Q_f, Q_c; 2245 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2246 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2247 if (Q_f != Q_c) { 2248 // LCOV_EXCL_START 2249 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2250 // LCOV_EXCL_STOP 2251 } 2252 2253 // Coarse to fine basis 2254 CeedBasis basis_c_to_f = NULL; 2255 if (op_prolong || op_restrict) { 2256 // Check if interpolation matrix is provided 2257 if (!interp_c_to_f) { 2258 // LCOV_EXCL_START 2259 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2260 // LCOV_EXCL_STOP 2261 } 2262 CeedElemTopology topo; 2263 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2264 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2265 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2266 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2267 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2268 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2269 CeedScalar *q_ref, *q_weight, *grad; 2270 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2271 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2272 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2273 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2274 CeedCall(CeedFree(&q_ref)); 2275 CeedCall(CeedFree(&q_weight)); 2276 CeedCall(CeedFree(&grad)); 2277 } 2278 2279 // Core code 2280 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2281 return CEED_ERROR_SUCCESS; 2282 } 2283 2284 /** 2285 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2286 2287 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2288 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V. 2289 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T 2290 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear. 2291 2292 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2293 2294 @param[in] op CeedOperator to create element inverses 2295 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2296 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2297 2298 @return An error code: 0 - success, otherwise - failure 2299 2300 @ref User 2301 **/ 2302 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2303 CeedCall(CeedOperatorCheckReady(op)); 2304 2305 if (op->CreateFDMElementInverse) { 2306 // Backend version 2307 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2308 return CEED_ERROR_SUCCESS; 2309 } else { 2310 // Operator fallback 2311 CeedOperator op_fallback; 2312 2313 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2314 if (op_fallback) { 2315 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2316 return CEED_ERROR_SUCCESS; 2317 } 2318 } 2319 2320 // Default interface implementation 2321 Ceed ceed, ceed_parent; 2322 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2323 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2324 ceed_parent = ceed_parent ? ceed_parent : ceed; 2325 CeedQFunction qf; 2326 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2327 2328 // Determine active input basis 2329 bool interp = false, grad = false; 2330 CeedBasis basis = NULL; 2331 CeedElemRestriction rstr = NULL; 2332 CeedOperatorField *op_fields; 2333 CeedQFunctionField *qf_fields; 2334 CeedInt num_input_fields; 2335 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2336 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2337 for (CeedInt i = 0; i < num_input_fields; i++) { 2338 CeedVector vec; 2339 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2340 if (vec == CEED_VECTOR_ACTIVE) { 2341 CeedEvalMode eval_mode; 2342 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2343 interp = interp || eval_mode == CEED_EVAL_INTERP; 2344 grad = grad || eval_mode == CEED_EVAL_GRAD; 2345 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2346 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2347 } 2348 } 2349 if (!basis) { 2350 // LCOV_EXCL_START 2351 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2352 // LCOV_EXCL_STOP 2353 } 2354 CeedSize l_size = 1; 2355 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2356 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2357 CeedCall(CeedBasisGetNumNodes(basis, &elem_size)); 2358 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2359 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2360 CeedCall(CeedBasisGetDimension(basis, &dim)); 2361 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2362 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2363 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2364 2365 // Build and diagonalize 1D Mass and Laplacian 2366 bool tensor_basis; 2367 CeedCall(CeedBasisIsTensor(basis, &tensor_basis)); 2368 if (!tensor_basis) { 2369 // LCOV_EXCL_START 2370 return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2371 // LCOV_EXCL_STOP 2372 } 2373 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2374 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2375 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2376 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2377 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2378 CeedCall(CeedCalloc(P_1d, &lambda)); 2379 // -- Build matrices 2380 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2381 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2382 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2383 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2384 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2385 2386 // -- Diagonalize 2387 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2388 CeedCall(CeedFree(&mass)); 2389 CeedCall(CeedFree(&laplace)); 2390 for (CeedInt i = 0; i < P_1d; i++) { 2391 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2392 } 2393 CeedCall(CeedFree(&x)); 2394 2395 // Assemble QFunction 2396 CeedVector assembled; 2397 CeedElemRestriction rstr_qf; 2398 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2399 CeedInt layout[3]; 2400 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2401 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2402 CeedScalar max_norm = 0; 2403 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2404 2405 // Calculate element averages 2406 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2407 CeedScalar *elem_avg; 2408 const CeedScalar *assembled_array, *q_weight_array; 2409 CeedVector q_weight; 2410 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2411 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2412 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2413 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2414 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2415 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2416 for (CeedInt e = 0; e < num_elem; e++) { 2417 CeedInt count = 0; 2418 for (CeedInt q = 0; q < num_qpts; q++) { 2419 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2420 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2421 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2422 count++; 2423 } 2424 } 2425 } 2426 if (count) { 2427 elem_avg[e] /= count; 2428 } else { 2429 elem_avg[e] = 1.0; 2430 } 2431 } 2432 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2433 CeedCall(CeedVectorDestroy(&assembled)); 2434 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2435 CeedCall(CeedVectorDestroy(&q_weight)); 2436 2437 // Build FDM diagonal 2438 CeedVector q_data; 2439 CeedScalar *q_data_array, *fdm_diagonal; 2440 CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal)); 2441 const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON; 2442 for (CeedInt c = 0; c < num_comp; c++) { 2443 for (CeedInt n = 0; n < elem_size; n++) { 2444 if (interp) fdm_diagonal[c * elem_size + n] = 1.0; 2445 if (grad) { 2446 for (CeedInt d = 0; d < dim; d++) { 2447 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2448 fdm_diagonal[c * elem_size + n] += lambda[i]; 2449 } 2450 } 2451 if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound; 2452 } 2453 } 2454 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data)); 2455 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2456 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2457 for (CeedInt e = 0; e < num_elem; e++) { 2458 for (CeedInt c = 0; c < num_comp; c++) { 2459 for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]); 2460 } 2461 } 2462 CeedCall(CeedFree(&elem_avg)); 2463 CeedCall(CeedFree(&fdm_diagonal)); 2464 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2465 2466 // Setup FDM operator 2467 // -- Basis 2468 CeedBasis fdm_basis; 2469 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2470 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2471 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2472 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2473 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2474 CeedCall(CeedFree(&fdm_interp)); 2475 CeedCall(CeedFree(&grad_dummy)); 2476 CeedCall(CeedFree(&q_ref_dummy)); 2477 CeedCall(CeedFree(&q_weight_dummy)); 2478 CeedCall(CeedFree(&lambda)); 2479 2480 // -- Restriction 2481 CeedElemRestriction rstr_qd_i; 2482 CeedInt strides[3] = {1, elem_size, elem_size * num_comp}; 2483 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i)); 2484 // -- QFunction 2485 CeedQFunction qf_fdm; 2486 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2487 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2488 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2489 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2490 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2491 // -- QFunction context 2492 CeedInt *num_comp_data; 2493 CeedCall(CeedCalloc(1, &num_comp_data)); 2494 num_comp_data[0] = num_comp; 2495 CeedQFunctionContext ctx_fdm; 2496 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2497 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2498 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2499 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2500 // -- Operator 2501 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2502 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2503 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2504 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2505 2506 // Cleanup 2507 CeedCall(CeedVectorDestroy(&q_data)); 2508 CeedCall(CeedBasisDestroy(&fdm_basis)); 2509 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2510 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2511 2512 return CEED_ERROR_SUCCESS; 2513 } 2514 2515 /// @} 2516