1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <assert.h> 9 #include <ceed-impl.h> 10 #include <ceed/backend.h> 11 #include <ceed/ceed.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) { 128 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 } 130 // Cleanup 131 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 133 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 134 } 135 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 136 CeedCall(CeedOperatorCheckReady(op_fallback)); 137 op->op_fallback = op_fallback; 138 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 144 145 @param[in] op CeedOperator to retrieve fallback for 146 @param[out] op_fallback Fallback CeedOperator 147 148 @return An error code: 0 - success, otherwise - failure 149 150 @ref Developer 151 **/ 152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 153 // Create if needed 154 if (!op->op_fallback) { 155 CeedCall(CeedOperatorCreateFallback(op)); 156 } 157 if (op->op_fallback) { 158 bool is_debug; 159 160 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 161 if (is_debug) { 162 Ceed ceed, ceed_fallback; 163 const char *resource, *resource_fallback; 164 165 CeedCall(CeedOperatorGetCeed(op, &ceed)); 166 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 167 CeedCall(CeedGetResource(ceed, &resource)); 168 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 169 170 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 171 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 172 op->op_fallback); 173 } 174 } 175 *op_fallback = op->op_fallback; 176 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /** 181 @brief Select correct basis matrix pointer based on CeedEvalMode 182 183 @param[in] eval_mode Current basis evaluation mode 184 @param[in] identity Pointer to identity matrix 185 @param[in] interp Pointer to interpolation matrix 186 @param[in] grad Pointer to gradient matrix 187 @param[out] basis_ptr Basis pointer to set 188 189 @ref Developer 190 **/ 191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad, 192 const CeedScalar **basis_ptr) { 193 switch (eval_mode) { 194 case CEED_EVAL_NONE: 195 *basis_ptr = identity; 196 break; 197 case CEED_EVAL_INTERP: 198 *basis_ptr = interp; 199 break; 200 case CEED_EVAL_GRAD: 201 *basis_ptr = grad; 202 break; 203 case CEED_EVAL_WEIGHT: 204 case CEED_EVAL_DIV: 205 case CEED_EVAL_CURL: 206 break; // Caught by QF Assembly 207 } 208 assert(*basis_ptr != NULL); 209 } 210 211 /** 212 @brief Create point block restriction for active operator field 213 214 @param[in] rstr Original CeedElemRestriction for active field 215 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 216 217 @return An error code: 0 - success, otherwise - failure 218 219 @ref Developer 220 **/ 221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 222 Ceed ceed; 223 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 224 const CeedInt *offsets; 225 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 226 227 // Expand offsets 228 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 229 CeedSize l_size; 230 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 231 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 232 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 233 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 234 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 235 CeedInt shift = num_comp; 236 if (comp_stride != 1) shift *= num_comp; 237 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 238 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 239 pointblock_offsets[i] = offsets[i] * shift; 240 } 241 242 // Create new restriction 243 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 244 pointblock_offsets, pointblock_rstr)); 245 246 // Cleanup 247 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 248 249 return CEED_ERROR_SUCCESS; 250 } 251 252 /** 253 @brief Core logic for assembling operator diagonal or point block diagonal 254 255 @param[in] op CeedOperator to assemble point block diagonal 256 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 257 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 258 @param[out] assembled CeedVector to store assembled diagonal 259 260 @return An error code: 0 - success, otherwise - failure 261 262 @ref Developer 263 **/ 264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 265 Ceed ceed; 266 CeedCall(CeedOperatorGetCeed(op, &ceed)); 267 268 // Assemble QFunction 269 CeedQFunction qf; 270 const CeedScalar *assembled_qf_array; 271 CeedVector assembled_qf; 272 CeedElemRestriction assembled_elem_rstr; 273 CeedInt num_input_fields, num_output_fields; 274 CeedInt layout[3]; 275 276 CeedCall(CeedOperatorGetQFunction(op, &qf)); 277 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 278 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 279 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 280 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 281 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 282 283 // Get assembly data 284 CeedOperatorAssemblyData data; 285 const CeedEvalMode **eval_modes_in, **eval_modes_out; 286 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 287 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 288 CeedBasis *active_bases; 289 CeedElemRestriction *active_elem_rstrs; 290 291 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 292 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 293 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 294 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 295 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 296 297 // Loop over all active bases 298 for (CeedInt b = 0; b < num_active_bases; b++) { 299 // Assemble point block diagonal restriction, if needed 300 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 301 302 if (is_pointblock) { 303 CeedElemRestriction point_block_elem_rstr; 304 305 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 306 diag_elem_rstr = point_block_elem_rstr; 307 } 308 309 // Create diagonal vector 310 CeedVector elem_diag; 311 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array; 315 CeedInt num_elem, num_nodes, num_qpts, num_components; 316 317 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 318 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 319 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 320 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 321 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 322 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 323 324 // Basis matrices 325 const CeedScalar *interp, *grad; 326 CeedScalar *identity = NULL; 327 bool has_eval_none = false; 328 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 329 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 330 } 331 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 332 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 333 } 334 if (has_eval_none) { 335 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 336 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 337 } 338 CeedCall(CeedBasisGetInterp(active_bases[b], &interp)); 339 CeedCall(CeedBasisGetGrad(active_bases[b], &grad)); 340 // Compute the diagonal of B^T D B 341 // Each element 342 for (CeedInt e = 0; e < num_elem; e++) { 343 CeedInt d_out = -1; 344 // Each basis eval mode pair 345 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 346 const CeedScalar *B_t = NULL; 347 if (eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out += 1; 348 CeedOperatorGetBasisPointer(eval_modes_out[b][e_out], identity, interp, &grad[d_out * num_qpts * num_nodes], &B_t); 349 CeedInt d_in = -1; 350 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 351 const CeedScalar *B = NULL; 352 if (eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in += 1; 353 CeedOperatorGetBasisPointer(eval_modes_in[b][e_in], identity, interp, &grad[d_in * num_qpts * num_nodes], &B); 354 // Each component 355 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 356 // Each qpt/node pair 357 for (CeedInt q = 0; q < num_qpts; q++) { 358 if (is_pointblock) { 359 // Point Block Diagonal 360 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 361 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 362 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 363 for (CeedInt n = 0; n < num_nodes; n++) { 364 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 365 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 366 } 367 } 368 } else { 369 // Diagonal Only 370 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 371 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 372 for (CeedInt n = 0; n < num_nodes; n++) { 373 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 374 } 375 } 376 } 377 } 378 } 379 } 380 } 381 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 382 383 // Assemble local operator diagonal 384 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 385 386 // Cleanup 387 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 388 CeedCall(CeedVectorDestroy(&elem_diag)); 389 CeedCall(CeedFree(&identity)); 390 } 391 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 392 CeedCall(CeedVectorDestroy(&assembled_qf)); 393 394 return CEED_ERROR_SUCCESS; 395 } 396 397 /** 398 @brief Core logic for assembling composite operator diagonal 399 400 @param[in] op CeedOperator to assemble point block diagonal 401 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 402 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 403 @param[out] assembled CeedVector to store assembled diagonal 404 405 @return An error code: 0 - success, otherwise - failure 406 407 @ref Developer 408 **/ 409 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 410 CeedVector assembled) { 411 CeedInt num_sub; 412 CeedOperator *suboperators; 413 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 414 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 415 for (CeedInt i = 0; i < num_sub; i++) { 416 if (is_pointblock) { 417 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 418 } else { 419 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 420 } 421 } 422 return CEED_ERROR_SUCCESS; 423 } 424 425 /** 426 @brief Build nonzero pattern for non-composite operator 427 428 Users should generally use CeedOperatorLinearAssembleSymbolic() 429 430 @param[in] op CeedOperator to assemble nonzero pattern 431 @param[in] offset Offset for number of entries 432 @param[out] rows Row number for each entry 433 @param[out] cols Column number for each entry 434 435 @return An error code: 0 - success, otherwise - failure 436 437 @ref Developer 438 **/ 439 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 440 Ceed ceed; 441 bool is_composite; 442 CeedCall(CeedOperatorGetCeed(op, &ceed)); 443 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 444 445 if (is_composite) { 446 // LCOV_EXCL_START 447 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 448 // LCOV_EXCL_STOP 449 } 450 451 CeedSize num_nodes; 452 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 453 CeedElemRestriction rstr_in; 454 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 455 CeedInt num_elem, elem_size, num_comp; 456 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 457 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 458 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 459 CeedInt layout_er[3]; 460 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 461 462 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 463 464 // Determine elem_dof relation 465 CeedVector index_vec; 466 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 467 CeedScalar *array; 468 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 469 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 470 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 471 CeedVector elem_dof; 472 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 473 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 474 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 475 const CeedScalar *elem_dof_a; 476 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 477 CeedCall(CeedVectorDestroy(&index_vec)); 478 479 // Determine i, j locations for element matrices 480 CeedInt count = 0; 481 for (CeedInt e = 0; e < num_elem; e++) { 482 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 483 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 484 for (CeedInt i = 0; i < elem_size; i++) { 485 for (CeedInt j = 0; j < elem_size; j++) { 486 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 487 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 488 489 const CeedInt row = elem_dof_a[elem_dof_index_row]; 490 const CeedInt col = elem_dof_a[elem_dof_index_col]; 491 492 rows[offset + count] = row; 493 cols[offset + count] = col; 494 count++; 495 } 496 } 497 } 498 } 499 } 500 if (count != local_num_entries) { 501 // LCOV_EXCL_START 502 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 503 // LCOV_EXCL_STOP 504 } 505 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 506 CeedCall(CeedVectorDestroy(&elem_dof)); 507 508 return CEED_ERROR_SUCCESS; 509 } 510 511 /** 512 @brief Assemble nonzero entries for non-composite operator 513 514 Users should generally use CeedOperatorLinearAssemble() 515 516 @param[in] op CeedOperator to assemble 517 @param[in] offset Offset for number of entries 518 @param[out] values Values to assemble into matrix 519 520 @return An error code: 0 - success, otherwise - failure 521 522 @ref Developer 523 **/ 524 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 525 Ceed ceed; 526 bool is_composite; 527 CeedCall(CeedOperatorGetCeed(op, &ceed)); 528 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 529 530 if (is_composite) { 531 // LCOV_EXCL_START 532 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 533 // LCOV_EXCL_STOP 534 } 535 536 // Early exit for empty operator 537 { 538 CeedInt num_elem = 0; 539 540 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 541 if (num_elem == 0) return CEED_ERROR_SUCCESS; 542 } 543 544 if (op->LinearAssembleSingle) { 545 // Backend version 546 CeedCall(op->LinearAssembleSingle(op, offset, values)); 547 return CEED_ERROR_SUCCESS; 548 } else { 549 // Operator fallback 550 CeedOperator op_fallback; 551 552 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 553 if (op_fallback) { 554 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 555 return CEED_ERROR_SUCCESS; 556 } 557 } 558 559 // Assemble QFunction 560 CeedQFunction qf; 561 CeedCall(CeedOperatorGetQFunction(op, &qf)); 562 CeedVector assembled_qf; 563 CeedElemRestriction rstr_q; 564 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 565 CeedSize qf_length; 566 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 567 568 CeedInt num_input_fields, num_output_fields; 569 CeedOperatorField *input_fields; 570 CeedOperatorField *output_fields; 571 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 572 573 // Get assembly data 574 CeedOperatorAssemblyData data; 575 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 576 const CeedEvalMode **eval_modes_in, **eval_modes_out; 577 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 578 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 579 &eval_modes_out, NULL, NULL)); 580 CeedBasis *bases; 581 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 582 CeedBasis basis_in = bases[0]; 583 584 if (num_active_bases > 1) { 585 // LCOV_EXCL_START 586 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 587 // LCOV_EXCL_STOP 588 } 589 if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) { 590 // LCOV_EXCL_START 591 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 592 // LCOV_EXCL_STOP 593 } 594 595 CeedElemRestriction active_rstr; 596 CeedInt num_elem, elem_size, num_qpts, num_comp; 597 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 598 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 599 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 600 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 601 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 602 603 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 604 605 // loop over elements and put in data structure 606 const CeedScalar *interp_in, *grad_in; 607 CeedCall(CeedBasisGetInterp(basis_in, &interp_in)); 608 CeedCall(CeedBasisGetGrad(basis_in, &grad_in)); 609 610 const CeedScalar *assembled_qf_array; 611 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 612 613 CeedInt layout_qf[3]; 614 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 615 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 616 617 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 618 const CeedScalar **B_mats_in, **B_mats_out; 619 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 620 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 621 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 622 CeedScalar elem_mat[elem_size * elem_size]; 623 CeedInt count = 0; 624 CeedScalar *vals; 625 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 626 for (CeedInt e = 0; e < num_elem; e++) { 627 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 628 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 629 // Compute B^T*D 630 for (CeedInt n = 0; n < elem_size; n++) { 631 for (CeedInt q = 0; q < num_qpts; q++) { 632 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 633 const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 634 CeedScalar sum = 0.0; 635 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 636 const CeedInt b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 637 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 638 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 639 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 640 } 641 BTD_mat[btd_index] = sum; 642 } 643 } 644 } 645 // form element matrix itself (for each block component) 646 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 647 648 // put element matrix in coordinate data structure 649 for (CeedInt i = 0; i < elem_size; i++) { 650 for (CeedInt j = 0; j < elem_size; j++) { 651 vals[offset + count] = elem_mat[i * elem_size + j]; 652 count++; 653 } 654 } 655 } 656 } 657 } 658 if (count != local_num_entries) { 659 // LCOV_EXCL_START 660 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 661 // LCOV_EXCL_STOP 662 } 663 CeedCall(CeedVectorRestoreArray(values, &vals)); 664 665 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 666 CeedCall(CeedVectorDestroy(&assembled_qf)); 667 668 return CEED_ERROR_SUCCESS; 669 } 670 671 /** 672 @brief Count number of entries for assembled CeedOperator 673 674 @param[in] op CeedOperator to assemble 675 @param[out] num_entries Number of entries in assembled representation 676 677 @return An error code: 0 - success, otherwise - failure 678 679 @ref Utility 680 **/ 681 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 682 bool is_composite; 683 CeedElemRestriction rstr; 684 CeedInt num_elem, elem_size, num_comp; 685 686 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 687 if (is_composite) { 688 // LCOV_EXCL_START 689 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 690 // LCOV_EXCL_STOP 691 } 692 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 693 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 694 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 695 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 696 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 697 698 return CEED_ERROR_SUCCESS; 699 } 700 701 /** 702 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 703 704 @param[in] op_fine Fine grid operator 705 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 706 @param[in] rstr_coarse Coarse grid restriction 707 @param[in] basis_coarse Coarse grid active vector basis 708 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 709 @param[out] op_coarse Coarse grid operator 710 @param[out] op_prolong Coarse to fine operator, or NULL 711 @param[out] op_restrict Fine to coarse operator, or NULL 712 713 @return An error code: 0 - success, otherwise - failure 714 715 @ref Developer 716 **/ 717 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 718 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 719 Ceed ceed; 720 CeedVector mult_vec = NULL; 721 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 722 723 // Check for composite operator 724 bool is_composite; 725 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 726 if (is_composite) { 727 // LCOV_EXCL_START 728 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 729 // LCOV_EXCL_STOP 730 } 731 732 // Coarse Grid 733 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 734 CeedElemRestriction rstr_fine = NULL; 735 // -- Clone input fields 736 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 737 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 738 rstr_fine = op_fine->input_fields[i]->elem_rstr; 739 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 740 } else { 741 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 742 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 743 } 744 } 745 // -- Clone output fields 746 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 747 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 748 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 749 } else { 750 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 751 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 752 } 753 } 754 // -- Clone QFunctionAssemblyData 755 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 756 757 // Multiplicity vector 758 if (op_restrict || op_prolong) { 759 CeedVector mult_e_vec; 760 761 if (!p_mult_fine) { 762 // LCOV_EXCL_START 763 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 764 // LCOV_EXCL_STOP 765 } 766 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 767 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 768 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 769 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 770 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 771 CeedCall(CeedVectorDestroy(&mult_e_vec)); 772 CeedCall(CeedVectorReciprocal(mult_vec)); 773 } 774 775 // Clone name 776 bool has_name = op_fine->name; 777 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 778 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 779 780 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 781 if ((op_restrict || op_prolong) && !basis_c_to_f) { 782 // LCOV_EXCL_START 783 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis"); 784 // LCOV_EXCL_STOP 785 } 786 787 // Restriction/Prolongation Operators 788 CeedInt num_comp; 789 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 790 791 // Restriction 792 if (op_restrict) { 793 CeedInt *num_comp_r_data; 794 CeedQFunction qf_restrict; 795 CeedQFunctionContext ctx_r; 796 797 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 798 CeedCall(CeedCalloc(1, &num_comp_r_data)); 799 num_comp_r_data[0] = num_comp; 800 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 801 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 802 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 803 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 804 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 805 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 806 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 807 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 808 809 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 810 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 811 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 812 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 813 814 // Set name 815 char *restriction_name; 816 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 817 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 818 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 819 CeedCall(CeedFree(&restriction_name)); 820 821 // Check 822 CeedCall(CeedOperatorCheckReady(*op_restrict)); 823 824 // Cleanup 825 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 826 } 827 828 // Prolongation 829 if (op_prolong) { 830 CeedInt *num_comp_p_data; 831 CeedQFunction qf_prolong; 832 CeedQFunctionContext ctx_p; 833 834 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 835 CeedCall(CeedCalloc(1, &num_comp_p_data)); 836 num_comp_p_data[0] = num_comp; 837 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 838 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 839 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 840 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 841 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 842 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 843 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 844 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 845 846 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 847 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 848 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 849 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 850 851 // Set name 852 char *prolongation_name; 853 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 854 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 855 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 856 CeedCall(CeedFree(&prolongation_name)); 857 858 // Check 859 CeedCall(CeedOperatorCheckReady(*op_prolong)); 860 861 // Cleanup 862 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 863 } 864 865 // Check 866 CeedCall(CeedOperatorCheckReady(*op_coarse)); 867 868 // Cleanup 869 CeedCall(CeedVectorDestroy(&mult_vec)); 870 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 871 872 return CEED_ERROR_SUCCESS; 873 } 874 875 /** 876 @brief Build 1D mass matrix and Laplacian with perturbation 877 878 @param[in] interp_1d Interpolation matrix in one dimension 879 @param[in] grad_1d Gradient matrix in one dimension 880 @param[in] q_weight_1d Quadrature weights in one dimension 881 @param[in] P_1d Number of basis nodes in one dimension 882 @param[in] Q_1d Number of quadrature points in one dimension 883 @param[in] dim Dimension of basis 884 @param[out] mass Assembled mass matrix in one dimension 885 @param[out] laplace Assembled perturbed Laplacian in one dimension 886 887 @return An error code: 0 - success, otherwise - failure 888 889 @ref Developer 890 **/ 891 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, 892 CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 893 for (CeedInt i = 0; i < P_1d; i++) { 894 for (CeedInt j = 0; j < P_1d; j++) { 895 CeedScalar sum = 0.0; 896 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 897 mass[i + j * P_1d] = sum; 898 } 899 } 900 // -- Laplacian 901 for (CeedInt i = 0; i < P_1d; i++) { 902 for (CeedInt j = 0; j < P_1d; j++) { 903 CeedScalar sum = 0.0; 904 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 905 laplace[i + j * P_1d] = sum; 906 } 907 } 908 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 909 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 910 return CEED_ERROR_SUCCESS; 911 } 912 CeedPragmaOptimizeOn; 913 914 /// @} 915 916 /// ---------------------------------------------------------------------------- 917 /// CeedOperator Backend API 918 /// ---------------------------------------------------------------------------- 919 /// @addtogroup CeedOperatorBackend 920 /// @{ 921 922 /** 923 @brief Create object holding CeedQFunction assembly data for CeedOperator 924 925 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 926 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 927 928 @return An error code: 0 - success, otherwise - failure 929 930 @ref Backend 931 **/ 932 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 933 CeedCall(CeedCalloc(1, data)); 934 (*data)->ref_count = 1; 935 (*data)->ceed = ceed; 936 CeedCall(CeedReference(ceed)); 937 938 return CEED_ERROR_SUCCESS; 939 } 940 941 /** 942 @brief Increment the reference counter for a CeedQFunctionAssemblyData 943 944 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 945 946 @return An error code: 0 - success, otherwise - failure 947 948 @ref Backend 949 **/ 950 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 951 data->ref_count++; 952 return CEED_ERROR_SUCCESS; 953 } 954 955 /** 956 @brief Set re-use of CeedQFunctionAssemblyData 957 958 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 959 @param[in] reuse_data Boolean flag indicating data re-use 960 961 @return An error code: 0 - success, otherwise - failure 962 963 @ref Backend 964 **/ 965 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 966 data->reuse_data = reuse_data; 967 data->needs_data_update = true; 968 return CEED_ERROR_SUCCESS; 969 } 970 971 /** 972 @brief Mark QFunctionAssemblyData as stale 973 974 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 975 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 976 977 @return An error code: 0 - success, otherwise - failure 978 979 @ref Backend 980 **/ 981 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 982 data->needs_data_update = needs_data_update; 983 return CEED_ERROR_SUCCESS; 984 } 985 986 /** 987 @brief Determine if QFunctionAssemblyData needs update 988 989 @param[in] data CeedQFunctionAssemblyData to mark as stale 990 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 991 992 @return An error code: 0 - success, otherwise - failure 993 994 @ref Backend 995 **/ 996 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 997 *is_update_needed = !data->reuse_data || data->needs_data_update; 998 return CEED_ERROR_SUCCESS; 999 } 1000 1001 /** 1002 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1003 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1004 Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData. 1005 This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData. 1006 1007 @param[in] data CeedQFunctionAssemblyData to copy reference to 1008 @param[in,out] data_copy Variable to store copied reference 1009 1010 @return An error code: 0 - success, otherwise - failure 1011 1012 @ref Backend 1013 **/ 1014 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1015 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1016 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1017 *data_copy = data; 1018 return CEED_ERROR_SUCCESS; 1019 } 1020 1021 /** 1022 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1023 1024 @param[in] data CeedQFunctionAssemblyData to retrieve status 1025 @param[out] is_setup Boolean flag for setup status 1026 1027 @return An error code: 0 - success, otherwise - failure 1028 1029 @ref Backend 1030 **/ 1031 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1032 *is_setup = data->is_setup; 1033 return CEED_ERROR_SUCCESS; 1034 } 1035 1036 /** 1037 @brief Set internal objects for CeedQFunctionAssemblyData 1038 1039 @param[in,out] data CeedQFunctionAssemblyData to set objects 1040 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1041 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1042 1043 @return An error code: 0 - success, otherwise - failure 1044 1045 @ref Backend 1046 **/ 1047 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1048 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1049 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1050 1051 data->is_setup = true; 1052 return CEED_ERROR_SUCCESS; 1053 } 1054 1055 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1056 if (!data->is_setup) { 1057 // LCOV_EXCL_START 1058 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1059 // LCOV_EXCL_STOP 1060 } 1061 1062 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1063 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1064 1065 return CEED_ERROR_SUCCESS; 1066 } 1067 1068 /** 1069 @brief Destroy CeedQFunctionAssemblyData 1070 1071 @param[in,out] data CeedQFunctionAssemblyData to destroy 1072 1073 @return An error code: 0 - success, otherwise - failure 1074 1075 @ref Backend 1076 **/ 1077 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1078 if (!*data || --(*data)->ref_count > 0) { 1079 *data = NULL; 1080 return CEED_ERROR_SUCCESS; 1081 } 1082 CeedCall(CeedDestroy(&(*data)->ceed)); 1083 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1084 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1085 1086 CeedCall(CeedFree(data)); 1087 return CEED_ERROR_SUCCESS; 1088 } 1089 1090 /** 1091 @brief Get CeedOperatorAssemblyData 1092 1093 @param[in] op CeedOperator to assemble 1094 @param[out] data CeedQFunctionAssemblyData 1095 1096 @return An error code: 0 - success, otherwise - failure 1097 1098 @ref Backend 1099 **/ 1100 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1101 if (!op->op_assembled) { 1102 CeedOperatorAssemblyData data; 1103 1104 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1105 op->op_assembled = data; 1106 } 1107 *data = op->op_assembled; 1108 1109 return CEED_ERROR_SUCCESS; 1110 } 1111 1112 /** 1113 @brief Create object holding CeedOperator assembly data. 1114 1115 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1116 An array with references to the corresponding active CeedElemRestrictions is also stored. 1117 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1118 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1119 CeedEvalMode. 1120 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1121 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1122 1123 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1124 @param[in] op CeedOperator to be assembled 1125 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1126 1127 @return An error code: 0 - success, otherwise - failure 1128 1129 @ref Backend 1130 **/ 1131 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1132 CeedInt num_active_bases = 0; 1133 1134 // Allocate 1135 CeedCall(CeedCalloc(1, data)); 1136 (*data)->ceed = ceed; 1137 CeedCall(CeedReference(ceed)); 1138 1139 // Build OperatorAssembly data 1140 CeedQFunction qf; 1141 CeedQFunctionField *qf_fields; 1142 CeedOperatorField *op_fields; 1143 CeedInt num_input_fields; 1144 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1145 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1146 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1147 1148 // Determine active input basis 1149 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1150 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1151 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1152 1153 for (CeedInt i = 0; i < num_input_fields; i++) { 1154 CeedVector vec; 1155 1156 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1157 if (vec == CEED_VECTOR_ACTIVE) { 1158 CeedInt index = -1, dim = 1, num_components; 1159 CeedBasis basis_in = NULL; 1160 CeedEvalMode eval_mode; 1161 1162 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1163 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1164 CeedCall(CeedBasisGetNumComponents(basis_in, &num_components)); 1165 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1166 for (CeedInt i = 0; i < num_active_bases; i++) { 1167 if ((*data)->active_bases[i] == basis_in) index = i; 1168 } 1169 if (index == -1) { 1170 CeedElemRestriction elem_rstr_in; 1171 1172 index = num_active_bases; 1173 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1174 (*data)->active_bases[num_active_bases] = NULL; 1175 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1176 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1177 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1178 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1179 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1180 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1181 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1182 num_eval_modes_in[index] = 0; 1183 num_eval_modes_out[index] = 0; 1184 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1185 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1186 eval_modes_in[index] = NULL; 1187 eval_modes_out[index] = NULL; 1188 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1189 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1190 eval_mode_offsets_in[index] = NULL; 1191 eval_mode_offsets_out[index] = NULL; 1192 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1193 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1194 (*data)->assembled_bases_in[index] = NULL; 1195 (*data)->assembled_bases_out[index] = NULL; 1196 num_active_bases++; 1197 } 1198 switch (eval_mode) { 1199 case CEED_EVAL_NONE: 1200 case CEED_EVAL_INTERP: 1201 CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_modes_in[index])); 1202 CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_mode_offsets_in[index])); 1203 eval_modes_in[index][num_eval_modes_in[index]] = eval_mode; 1204 eval_mode_offsets_in[index][num_eval_modes_in[index]] = offset; 1205 offset += num_components; 1206 num_eval_modes_in[index] += 1; 1207 break; 1208 case CEED_EVAL_GRAD: 1209 CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_modes_in[index])); 1210 CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_mode_offsets_in[index])); 1211 for (CeedInt d = 0; d < dim; d++) { 1212 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1213 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1214 offset += num_components; 1215 } 1216 num_eval_modes_in[index] += dim; 1217 break; 1218 case CEED_EVAL_WEIGHT: 1219 case CEED_EVAL_DIV: 1220 case CEED_EVAL_CURL: 1221 break; // Caught by QF Assembly 1222 } 1223 } 1224 } 1225 (*data)->num_eval_modes_in = num_eval_modes_in; 1226 (*data)->eval_modes_in = eval_modes_in; 1227 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1228 1229 // Determine active output basis 1230 CeedInt num_output_fields; 1231 1232 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1233 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1234 offset = 0; 1235 for (CeedInt i = 0; i < num_output_fields; i++) { 1236 CeedVector vec; 1237 1238 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1239 if (vec == CEED_VECTOR_ACTIVE) { 1240 CeedInt index = -1, dim = 1, num_components; 1241 CeedBasis basis_out = NULL; 1242 CeedEvalMode eval_mode; 1243 1244 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1245 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1246 CeedCall(CeedBasisGetNumComponents(basis_out, &num_components)); 1247 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1248 for (CeedInt i = 0; i < num_active_bases; i++) { 1249 if ((*data)->active_bases[i] == basis_out) index = i; 1250 } 1251 if (index == -1) { 1252 CeedElemRestriction elem_rstr_out; 1253 1254 index = num_active_bases; 1255 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1256 (*data)->active_bases[num_active_bases] = NULL; 1257 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1258 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1259 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1260 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1261 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1262 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1263 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1264 num_eval_modes_in[index] = 0; 1265 num_eval_modes_out[index] = 0; 1266 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1267 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1268 eval_modes_in[index] = NULL; 1269 eval_modes_out[index] = NULL; 1270 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1271 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1272 eval_mode_offsets_in[index] = NULL; 1273 eval_mode_offsets_out[index] = NULL; 1274 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1275 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1276 (*data)->assembled_bases_in[index] = NULL; 1277 (*data)->assembled_bases_out[index] = NULL; 1278 num_active_bases++; 1279 } 1280 switch (eval_mode) { 1281 case CEED_EVAL_NONE: 1282 case CEED_EVAL_INTERP: 1283 CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_modes_out[index])); 1284 CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_mode_offsets_out[index])); 1285 eval_modes_out[index][num_eval_modes_out[index]] = eval_mode; 1286 eval_mode_offsets_out[index][num_eval_modes_out[index]] = offset; 1287 offset += num_components; 1288 num_eval_modes_out[index] += 1; 1289 break; 1290 case CEED_EVAL_GRAD: 1291 CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_modes_out[index])); 1292 CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_mode_offsets_out[index])); 1293 for (CeedInt d = 0; d < dim; d++) { 1294 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1295 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1296 offset += num_components; 1297 } 1298 num_eval_modes_out[index] += dim; 1299 break; 1300 case CEED_EVAL_WEIGHT: 1301 case CEED_EVAL_DIV: 1302 case CEED_EVAL_CURL: 1303 break; // Caught by QF Assembly 1304 } 1305 } 1306 } 1307 (*data)->num_output_components = offset; 1308 (*data)->num_eval_modes_out = num_eval_modes_out; 1309 (*data)->eval_modes_out = eval_modes_out; 1310 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1311 (*data)->num_active_bases = num_active_bases; 1312 1313 return CEED_ERROR_SUCCESS; 1314 } 1315 1316 /** 1317 @brief Get CeedOperator CeedEvalModes for assembly. 1318 1319 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1320 1321 @param[in] data CeedOperatorAssemblyData 1322 @param[out] num_active_bases Total number of active bases 1323 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1324 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1325 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1326 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1327 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1328 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1329 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1330 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1331 including contributions of all active bases 1332 1333 @return An error code: 0 - success, otherwise - failure 1334 1335 1336 @ref Backend 1337 **/ 1338 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1339 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1340 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1341 if (num_active_bases) *num_active_bases = data->num_active_bases; 1342 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1343 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1344 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1345 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1346 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1347 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1348 if (num_output_components) *num_output_components = data->num_output_components; 1349 1350 return CEED_ERROR_SUCCESS; 1351 } 1352 1353 /** 1354 @brief Get CeedOperator CeedBasis data for assembly. 1355 1356 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1357 1358 @param[in] data CeedOperatorAssemblyData 1359 @param[out] num_active_bases Number of active bases, or NULL 1360 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1361 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1362 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1363 1364 @return An error code: 0 - success, otherwise - failure 1365 1366 @ref Backend 1367 **/ 1368 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1369 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1370 // Assemble B_in, B_out if needed 1371 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1372 CeedInt num_qpts; 1373 1374 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1375 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1376 CeedInt elem_size; 1377 CeedScalar *B_in = NULL, *identity = NULL; 1378 const CeedScalar *interp_in, *grad_in; 1379 bool has_eval_none = false; 1380 1381 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size)); 1382 CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_in[b], &B_in)); 1383 1384 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1385 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1386 } 1387 if (has_eval_none) { 1388 CeedCall(CeedCalloc(num_qpts * elem_size, &identity)); 1389 for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { 1390 identity[i * elem_size + i] = 1.0; 1391 } 1392 } 1393 CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_in)); 1394 CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_in)); 1395 1396 for (CeedInt q = 0; q < num_qpts; q++) { 1397 for (CeedInt n = 0; n < elem_size; n++) { 1398 CeedInt d_in = -1; 1399 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1400 const CeedInt qq = data->num_eval_modes_in[b] * q; 1401 const CeedScalar *B = NULL; 1402 1403 if (data->eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in++; 1404 CeedOperatorGetBasisPointer(data->eval_modes_in[b][e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &B); 1405 B_in[(qq + e_in) * elem_size + n] = B[q * elem_size + n]; 1406 } 1407 } 1408 } 1409 if (identity) CeedCall(CeedFree(identity)); 1410 data->assembled_bases_in[b] = B_in; 1411 } 1412 } 1413 1414 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1415 CeedInt num_qpts; 1416 1417 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1418 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1419 CeedInt elem_size; 1420 const CeedScalar *interp_out, *grad_out; 1421 bool has_eval_none = false; 1422 CeedScalar *B_out = NULL, *identity = NULL; 1423 1424 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size)); 1425 CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_out[b], &B_out)); 1426 1427 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1428 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1429 } 1430 if (has_eval_none) { 1431 CeedCall(CeedCalloc(num_qpts * elem_size, &identity)); 1432 for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { 1433 identity[i * elem_size + i] = 1.0; 1434 } 1435 } 1436 CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_out)); 1437 CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_out)); 1438 1439 for (CeedInt q = 0; q < num_qpts; q++) { 1440 for (CeedInt n = 0; n < elem_size; n++) { 1441 CeedInt d_out = -1; 1442 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1443 const CeedInt qq = data->num_eval_modes_out[b] * q; 1444 const CeedScalar *B = NULL; 1445 1446 if (data->eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out++; 1447 CeedOperatorGetBasisPointer(data->eval_modes_out[b][e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &B); 1448 B_out[(qq + e_out) * elem_size + n] = B[q * elem_size + n]; 1449 } 1450 } 1451 } 1452 if (identity) CeedCall(CeedFree(identity)); 1453 data->assembled_bases_out[b] = B_out; 1454 } 1455 } 1456 1457 // Pass out assembled data 1458 if (active_bases) *active_bases = data->active_bases; 1459 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1460 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1461 1462 return CEED_ERROR_SUCCESS; 1463 } 1464 1465 /** 1466 @brief Get CeedOperator CeedBasis data for assembly. 1467 1468 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1469 1470 @param[in] data CeedOperatorAssemblyData 1471 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1472 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1473 1474 @return An error code: 0 - success, otherwise - failure 1475 1476 @ref Backend 1477 **/ 1478 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1479 CeedElemRestriction **active_elem_rstrs) { 1480 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1481 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1482 1483 return CEED_ERROR_SUCCESS; 1484 } 1485 1486 /** 1487 @brief Destroy CeedOperatorAssemblyData 1488 1489 @param[in,out] data CeedOperatorAssemblyData to destroy 1490 1491 @return An error code: 0 - success, otherwise - failure 1492 1493 @ref Backend 1494 **/ 1495 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1496 if (!*data) { 1497 *data = NULL; 1498 return CEED_ERROR_SUCCESS; 1499 } 1500 CeedCall(CeedDestroy(&(*data)->ceed)); 1501 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1502 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1503 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1504 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1505 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1506 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1507 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1508 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1509 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1510 } 1511 CeedCall(CeedFree(&(*data)->active_bases)); 1512 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1513 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1514 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1515 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1516 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1517 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1518 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1519 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1520 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1521 1522 CeedCall(CeedFree(data)); 1523 return CEED_ERROR_SUCCESS; 1524 } 1525 1526 /// @} 1527 1528 /// ---------------------------------------------------------------------------- 1529 /// CeedOperator Public API 1530 /// ---------------------------------------------------------------------------- 1531 /// @addtogroup CeedOperatorUser 1532 /// @{ 1533 1534 /** 1535 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1536 1537 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1538 The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices 1539 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the 1540 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, 1541 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] 1542 and producing the output [dv_0, dv_1, v]. 1543 1544 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1545 1546 @param[in] op CeedOperator to assemble CeedQFunction 1547 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1548 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1549 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1550 1551 @return An error code: 0 - success, otherwise - failure 1552 1553 @ref User 1554 **/ 1555 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1556 CeedCall(CeedOperatorCheckReady(op)); 1557 1558 if (op->LinearAssembleQFunction) { 1559 // Backend version 1560 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1561 } else { 1562 // Operator fallback 1563 CeedOperator op_fallback; 1564 1565 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1566 if (op_fallback) { 1567 CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1568 } else { 1569 // LCOV_EXCL_START 1570 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1571 // LCOV_EXCL_STOP 1572 } 1573 } 1574 return CEED_ERROR_SUCCESS; 1575 } 1576 1577 /** 1578 @brief Assemble CeedQFunction and store result internally. 1579 Return copied references of stored data to the caller. 1580 Caller is responsible for ownership and destruction of the copied references. 1581 See also @ref CeedOperatorLinearAssembleQFunction 1582 1583 @param[in] op CeedOperator to assemble CeedQFunction 1584 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1585 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1586 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1587 1588 @return An error code: 0 - success, otherwise - failure 1589 1590 @ref User 1591 **/ 1592 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1593 CeedCall(CeedOperatorCheckReady(op)); 1594 1595 if (op->LinearAssembleQFunctionUpdate) { 1596 // Backend version 1597 bool qf_assembled_is_setup; 1598 CeedVector assembled_vec = NULL; 1599 CeedElemRestriction assembled_rstr = NULL; 1600 1601 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1602 if (qf_assembled_is_setup) { 1603 bool update_needed; 1604 1605 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1606 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1607 if (update_needed) { 1608 CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1609 } 1610 } else { 1611 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1612 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1613 } 1614 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1615 1616 // Copy reference from internally held copy 1617 *assembled = NULL; 1618 *rstr = NULL; 1619 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1620 CeedCall(CeedVectorDestroy(&assembled_vec)); 1621 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1622 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1623 } else { 1624 // Operator fallback 1625 CeedOperator op_fallback; 1626 1627 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1628 if (op_fallback) { 1629 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1630 } else { 1631 // LCOV_EXCL_START 1632 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1633 // LCOV_EXCL_STOP 1634 } 1635 } 1636 1637 return CEED_ERROR_SUCCESS; 1638 } 1639 1640 /** 1641 @brief Assemble the diagonal of a square linear CeedOperator 1642 1643 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1644 1645 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1646 1647 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1648 1649 @param[in] op CeedOperator to assemble CeedQFunction 1650 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1651 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1652 1653 @return An error code: 0 - success, otherwise - failure 1654 1655 @ref User 1656 **/ 1657 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1658 bool is_composite; 1659 CeedCall(CeedOperatorCheckReady(op)); 1660 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1661 1662 CeedSize input_size = 0, output_size = 0; 1663 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1664 if (input_size != output_size) { 1665 // LCOV_EXCL_START 1666 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1667 // LCOV_EXCL_STOP 1668 } 1669 1670 // Early exit for empty operator 1671 if (!is_composite) { 1672 CeedInt num_elem = 0; 1673 1674 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1675 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1676 } 1677 1678 if (op->LinearAssembleDiagonal) { 1679 // Backend version 1680 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1681 return CEED_ERROR_SUCCESS; 1682 } else if (op->LinearAssembleAddDiagonal) { 1683 // Backend version with zeroing first 1684 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1685 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1686 return CEED_ERROR_SUCCESS; 1687 } else { 1688 // Operator fallback 1689 CeedOperator op_fallback; 1690 1691 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1692 if (op_fallback) { 1693 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1694 return CEED_ERROR_SUCCESS; 1695 } 1696 } 1697 // Default interface implementation 1698 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1699 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1700 1701 return CEED_ERROR_SUCCESS; 1702 } 1703 1704 /** 1705 @brief Assemble the diagonal of a square linear CeedOperator 1706 1707 This sums into a CeedVector the diagonal of a linear CeedOperator. 1708 1709 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1710 1711 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1712 1713 @param[in] op CeedOperator to assemble CeedQFunction 1714 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1715 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1716 1717 @return An error code: 0 - success, otherwise - failure 1718 1719 @ref User 1720 **/ 1721 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1722 bool is_composite; 1723 CeedCall(CeedOperatorCheckReady(op)); 1724 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1725 1726 CeedSize input_size = 0, output_size = 0; 1727 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1728 if (input_size != output_size) { 1729 // LCOV_EXCL_START 1730 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1731 // LCOV_EXCL_STOP 1732 } 1733 1734 // Early exit for empty operator 1735 if (!is_composite) { 1736 CeedInt num_elem = 0; 1737 1738 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1739 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1740 } 1741 1742 if (op->LinearAssembleAddDiagonal) { 1743 // Backend version 1744 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1745 return CEED_ERROR_SUCCESS; 1746 } else { 1747 // Operator fallback 1748 CeedOperator op_fallback; 1749 1750 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1751 if (op_fallback) { 1752 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1753 return CEED_ERROR_SUCCESS; 1754 } 1755 } 1756 // Default interface implementation 1757 if (is_composite) { 1758 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1759 } else { 1760 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1761 } 1762 1763 return CEED_ERROR_SUCCESS; 1764 } 1765 1766 /** 1767 @brief Assemble the point block diagonal of a square linear CeedOperator 1768 1769 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1770 1771 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1772 1773 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1774 1775 @param[in] op CeedOperator to assemble CeedQFunction 1776 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1777 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1778 component in]. 1779 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1780 1781 @return An error code: 0 - success, otherwise - failure 1782 1783 @ref User 1784 **/ 1785 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1786 bool is_composite; 1787 CeedCall(CeedOperatorCheckReady(op)); 1788 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1789 1790 CeedSize input_size = 0, output_size = 0; 1791 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1792 if (input_size != output_size) { 1793 // LCOV_EXCL_START 1794 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1795 // LCOV_EXCL_STOP 1796 } 1797 1798 // Early exit for empty operator 1799 if (!is_composite) { 1800 CeedInt num_elem = 0; 1801 1802 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1803 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1804 } 1805 1806 if (op->LinearAssemblePointBlockDiagonal) { 1807 // Backend version 1808 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1809 return CEED_ERROR_SUCCESS; 1810 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1811 // Backend version with zeroing first 1812 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1813 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1814 return CEED_ERROR_SUCCESS; 1815 } else { 1816 // Operator fallback 1817 CeedOperator op_fallback; 1818 1819 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1820 if (op_fallback) { 1821 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1822 return CEED_ERROR_SUCCESS; 1823 } 1824 } 1825 // Default interface implementation 1826 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1827 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1828 1829 return CEED_ERROR_SUCCESS; 1830 } 1831 1832 /** 1833 @brief Assemble the point block diagonal of a square linear CeedOperator 1834 1835 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1836 1837 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1838 1839 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1840 1841 @param[in] op CeedOperator to assemble CeedQFunction 1842 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1843 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1844 component in]. 1845 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1846 1847 @return An error code: 0 - success, otherwise - failure 1848 1849 @ref User 1850 **/ 1851 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1852 bool is_composite; 1853 CeedCall(CeedOperatorCheckReady(op)); 1854 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1855 1856 CeedSize input_size = 0, output_size = 0; 1857 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1858 if (input_size != output_size) { 1859 // LCOV_EXCL_START 1860 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1861 // LCOV_EXCL_STOP 1862 } 1863 1864 // Early exit for empty operator 1865 if (!is_composite) { 1866 CeedInt num_elem = 0; 1867 1868 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1869 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1870 } 1871 1872 if (op->LinearAssembleAddPointBlockDiagonal) { 1873 // Backend version 1874 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1875 return CEED_ERROR_SUCCESS; 1876 } else { 1877 // Operator fallback 1878 CeedOperator op_fallback; 1879 1880 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1881 if (op_fallback) { 1882 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1883 return CEED_ERROR_SUCCESS; 1884 } 1885 } 1886 // Default interface implementation 1887 if (is_composite) { 1888 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1889 } else { 1890 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1891 } 1892 1893 return CEED_ERROR_SUCCESS; 1894 } 1895 1896 /** 1897 @brief Fully assemble the nonzero pattern of a linear operator. 1898 1899 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1900 1901 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1902 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j) 1903 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1904 1905 This will generally be slow unless your operator is low-order. 1906 1907 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1908 1909 @param[in] op CeedOperator to assemble 1910 @param[out] num_entries Number of entries in coordinate nonzero pattern 1911 @param[out] rows Row number for each entry 1912 @param[out] cols Column number for each entry 1913 1914 @ref User 1915 **/ 1916 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1917 CeedInt num_suboperators, single_entries; 1918 CeedOperator *sub_operators; 1919 bool is_composite; 1920 CeedCall(CeedOperatorCheckReady(op)); 1921 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1922 1923 if (op->LinearAssembleSymbolic) { 1924 // Backend version 1925 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1926 return CEED_ERROR_SUCCESS; 1927 } else { 1928 // Operator fallback 1929 CeedOperator op_fallback; 1930 1931 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1932 if (op_fallback) { 1933 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1934 return CEED_ERROR_SUCCESS; 1935 } 1936 } 1937 1938 // Default interface implementation 1939 1940 // count entries and allocate rows, cols arrays 1941 *num_entries = 0; 1942 if (is_composite) { 1943 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1944 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1945 for (CeedInt k = 0; k < num_suboperators; ++k) { 1946 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1947 *num_entries += single_entries; 1948 } 1949 } else { 1950 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1951 *num_entries += single_entries; 1952 } 1953 CeedCall(CeedCalloc(*num_entries, rows)); 1954 CeedCall(CeedCalloc(*num_entries, cols)); 1955 1956 // assemble nonzero locations 1957 CeedInt offset = 0; 1958 if (is_composite) { 1959 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1960 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1961 for (CeedInt k = 0; k < num_suboperators; ++k) { 1962 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1963 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1964 offset += single_entries; 1965 } 1966 } else { 1967 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1968 } 1969 1970 return CEED_ERROR_SUCCESS; 1971 } 1972 1973 /** 1974 @brief Fully assemble the nonzero entries of a linear operator. 1975 1976 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1977 1978 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1979 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added, 1980 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1981 1982 This will generally be slow unless your operator is low-order. 1983 1984 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1985 1986 @param[in] op CeedOperator to assemble 1987 @param[out] values Values to assemble into matrix 1988 1989 @ref User 1990 **/ 1991 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1992 CeedInt num_suboperators, single_entries = 0; 1993 CeedOperator *sub_operators; 1994 bool is_composite; 1995 CeedCall(CeedOperatorCheckReady(op)); 1996 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1997 1998 // Early exit for empty operator 1999 if (!is_composite) { 2000 CeedInt num_elem = 0; 2001 2002 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2003 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2004 } 2005 2006 if (op->LinearAssemble) { 2007 // Backend version 2008 CeedCall(op->LinearAssemble(op, values)); 2009 return CEED_ERROR_SUCCESS; 2010 } else { 2011 // Operator fallback 2012 CeedOperator op_fallback; 2013 2014 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2015 if (op_fallback) { 2016 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2017 return CEED_ERROR_SUCCESS; 2018 } 2019 } 2020 2021 // Default interface implementation 2022 CeedInt offset = 0; 2023 CeedCall(CeedVectorSetValue(values, 0.0)); 2024 if (is_composite) { 2025 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2026 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2027 for (CeedInt k = 0; k < num_suboperators; k++) { 2028 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2029 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2030 offset += single_entries; 2031 } 2032 } else { 2033 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2034 } 2035 2036 return CEED_ERROR_SUCCESS; 2037 } 2038 2039 /** 2040 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2041 2042 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2043 2044 @param[in] op Composite CeedOperator 2045 @param[in] num_skip_indices Number of suboperators to skip 2046 @param[in] skip_indices Array of indices of suboperators to skip 2047 @param[out] mult Vector to store multiplicity (of size l_size) 2048 2049 @return An error code: 0 - success, otherwise - failure 2050 2051 @ref User 2052 **/ 2053 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2054 CeedCall(CeedOperatorCheckReady(op)); 2055 2056 Ceed ceed; 2057 CeedInt num_suboperators; 2058 CeedSize l_vec_len; 2059 CeedScalar *mult_array; 2060 CeedVector ones_l_vec; 2061 CeedElemRestriction elem_rstr; 2062 CeedOperator *sub_operators; 2063 2064 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2065 2066 // Zero mult vector 2067 CeedCall(CeedVectorSetValue(mult, 0.0)); 2068 2069 // Get suboperators 2070 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2071 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2072 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2073 2074 // Work vector 2075 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2076 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2077 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2078 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2079 2080 // Compute multiplicity across suboperators 2081 for (CeedInt i = 0; i < num_suboperators; i++) { 2082 const CeedScalar *sub_mult_array; 2083 CeedVector sub_mult_l_vec, ones_e_vec; 2084 2085 // -- Check for suboperator to skip 2086 for (CeedInt j = 0; j < num_skip_indices; j++) { 2087 if (skip_indices[j] == i) continue; 2088 } 2089 2090 // -- Sub operator multiplicity 2091 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2092 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2093 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2094 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2095 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2096 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2097 // ---- Flag every node present in the current suboperator 2098 for (CeedInt j = 0; j < l_vec_len; j++) { 2099 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2100 } 2101 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2102 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2103 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2104 } 2105 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2106 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2107 2108 return CEED_ERROR_SUCCESS; 2109 } 2110 2111 /** 2112 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2113 grid interpolation 2114 2115 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2116 2117 @param[in] op_fine Fine grid operator 2118 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2119 @param[in] rstr_coarse Coarse grid restriction 2120 @param[in] basis_coarse Coarse grid active vector basis 2121 @param[out] op_coarse Coarse grid operator 2122 @param[out] op_prolong Coarse to fine operator, or NULL 2123 @param[out] op_restrict Fine to coarse operator, or NULL 2124 2125 @return An error code: 0 - success, otherwise - failure 2126 2127 @ref User 2128 **/ 2129 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2130 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2131 CeedCall(CeedOperatorCheckReady(op_fine)); 2132 2133 // Build prolongation matrix, if required 2134 CeedBasis basis_c_to_f = NULL; 2135 if (op_prolong || op_restrict) { 2136 CeedBasis basis_fine; 2137 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2138 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2139 } 2140 2141 // Core code 2142 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2143 2144 return CEED_ERROR_SUCCESS; 2145 } 2146 2147 /** 2148 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2149 2150 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2151 2152 @param[in] op_fine Fine grid operator 2153 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2154 @param[in] rstr_coarse Coarse grid restriction 2155 @param[in] basis_coarse Coarse grid active vector basis 2156 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2157 @param[out] op_coarse Coarse grid operator 2158 @param[out] op_prolong Coarse to fine operator, or NULL 2159 @param[out] op_restrict Fine to coarse operator, or NULL 2160 2161 @return An error code: 0 - success, otherwise - failure 2162 2163 @ref User 2164 **/ 2165 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2166 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2167 CeedOperator *op_restrict) { 2168 CeedCall(CeedOperatorCheckReady(op_fine)); 2169 Ceed ceed; 2170 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2171 2172 // Check for compatible quadrature spaces 2173 CeedBasis basis_fine; 2174 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2175 CeedInt Q_f, Q_c; 2176 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2177 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2178 if (Q_f != Q_c) { 2179 // LCOV_EXCL_START 2180 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2181 // LCOV_EXCL_STOP 2182 } 2183 2184 // Create coarse to fine basis, if required 2185 CeedBasis basis_c_to_f = NULL; 2186 if (op_prolong || op_restrict) { 2187 // Check if interpolation matrix is provided 2188 if (!interp_c_to_f) { 2189 // LCOV_EXCL_START 2190 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2191 // LCOV_EXCL_STOP 2192 } 2193 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2194 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2195 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2196 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2197 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2198 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2199 CeedScalar *q_ref, *q_weight, *grad; 2200 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2201 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2202 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2203 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2204 CeedCall(CeedFree(&q_ref)); 2205 CeedCall(CeedFree(&q_weight)); 2206 CeedCall(CeedFree(&grad)); 2207 } 2208 2209 // Core code 2210 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2211 return CEED_ERROR_SUCCESS; 2212 } 2213 2214 /** 2215 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2216 2217 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2218 2219 @param[in] op_fine Fine grid operator 2220 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2221 @param[in] rstr_coarse Coarse grid restriction 2222 @param[in] basis_coarse Coarse grid active vector basis 2223 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2224 @param[out] op_coarse Coarse grid operator 2225 @param[out] op_prolong Coarse to fine operator, or NULL 2226 @param[out] op_restrict Fine to coarse operator, or NULL 2227 2228 @return An error code: 0 - success, otherwise - failure 2229 2230 @ref User 2231 **/ 2232 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2233 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2234 CeedOperator *op_restrict) { 2235 CeedCall(CeedOperatorCheckReady(op_fine)); 2236 Ceed ceed; 2237 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2238 2239 // Check for compatible quadrature spaces 2240 CeedBasis basis_fine; 2241 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2242 CeedInt Q_f, Q_c; 2243 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2244 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2245 if (Q_f != Q_c) { 2246 // LCOV_EXCL_START 2247 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2248 // LCOV_EXCL_STOP 2249 } 2250 2251 // Coarse to fine basis 2252 CeedBasis basis_c_to_f = NULL; 2253 if (op_prolong || op_restrict) { 2254 // Check if interpolation matrix is provided 2255 if (!interp_c_to_f) { 2256 // LCOV_EXCL_START 2257 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2258 // LCOV_EXCL_STOP 2259 } 2260 CeedElemTopology topo; 2261 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2262 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2263 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2264 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2265 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2266 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2267 CeedScalar *q_ref, *q_weight, *grad; 2268 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2269 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2270 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2271 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2272 CeedCall(CeedFree(&q_ref)); 2273 CeedCall(CeedFree(&q_weight)); 2274 CeedCall(CeedFree(&grad)); 2275 } 2276 2277 // Core code 2278 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2279 return CEED_ERROR_SUCCESS; 2280 } 2281 2282 /** 2283 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2284 2285 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2286 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V. 2287 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T 2288 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear. 2289 2290 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2291 2292 @param[in] op CeedOperator to create element inverses 2293 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2294 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2295 2296 @return An error code: 0 - success, otherwise - failure 2297 2298 @ref User 2299 **/ 2300 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2301 CeedCall(CeedOperatorCheckReady(op)); 2302 2303 if (op->CreateFDMElementInverse) { 2304 // Backend version 2305 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2306 return CEED_ERROR_SUCCESS; 2307 } else { 2308 // Operator fallback 2309 CeedOperator op_fallback; 2310 2311 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2312 if (op_fallback) { 2313 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2314 return CEED_ERROR_SUCCESS; 2315 } 2316 } 2317 2318 // Default interface implementation 2319 Ceed ceed, ceed_parent; 2320 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2321 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2322 ceed_parent = ceed_parent ? ceed_parent : ceed; 2323 CeedQFunction qf; 2324 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2325 2326 // Determine active input basis 2327 bool interp = false, grad = false; 2328 CeedBasis basis = NULL; 2329 CeedElemRestriction rstr = NULL; 2330 CeedOperatorField *op_fields; 2331 CeedQFunctionField *qf_fields; 2332 CeedInt num_input_fields; 2333 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2334 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2335 for (CeedInt i = 0; i < num_input_fields; i++) { 2336 CeedVector vec; 2337 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2338 if (vec == CEED_VECTOR_ACTIVE) { 2339 CeedEvalMode eval_mode; 2340 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2341 interp = interp || eval_mode == CEED_EVAL_INTERP; 2342 grad = grad || eval_mode == CEED_EVAL_GRAD; 2343 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2344 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2345 } 2346 } 2347 if (!basis) { 2348 // LCOV_EXCL_START 2349 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2350 // LCOV_EXCL_STOP 2351 } 2352 CeedSize l_size = 1; 2353 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2354 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2355 CeedCall(CeedBasisGetNumNodes(basis, &elem_size)); 2356 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2357 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2358 CeedCall(CeedBasisGetDimension(basis, &dim)); 2359 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2360 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2361 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2362 2363 // Build and diagonalize 1D Mass and Laplacian 2364 bool tensor_basis; 2365 CeedCall(CeedBasisIsTensor(basis, &tensor_basis)); 2366 if (!tensor_basis) { 2367 // LCOV_EXCL_START 2368 return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2369 // LCOV_EXCL_STOP 2370 } 2371 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2372 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2373 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2374 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2375 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2376 CeedCall(CeedCalloc(P_1d, &lambda)); 2377 // -- Build matrices 2378 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2379 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2380 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2381 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2382 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2383 2384 // -- Diagonalize 2385 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2386 CeedCall(CeedFree(&mass)); 2387 CeedCall(CeedFree(&laplace)); 2388 for (CeedInt i = 0; i < P_1d; i++) { 2389 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2390 } 2391 CeedCall(CeedFree(&x)); 2392 2393 // Assemble QFunction 2394 CeedVector assembled; 2395 CeedElemRestriction rstr_qf; 2396 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2397 CeedInt layout[3]; 2398 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2399 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2400 CeedScalar max_norm = 0; 2401 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2402 2403 // Calculate element averages 2404 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2405 CeedScalar *elem_avg; 2406 const CeedScalar *assembled_array, *q_weight_array; 2407 CeedVector q_weight; 2408 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2409 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2410 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2411 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2412 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2413 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2414 for (CeedInt e = 0; e < num_elem; e++) { 2415 CeedInt count = 0; 2416 for (CeedInt q = 0; q < num_qpts; q++) { 2417 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2418 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2419 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2420 count++; 2421 } 2422 } 2423 } 2424 if (count) { 2425 elem_avg[e] /= count; 2426 } else { 2427 elem_avg[e] = 1.0; 2428 } 2429 } 2430 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2431 CeedCall(CeedVectorDestroy(&assembled)); 2432 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2433 CeedCall(CeedVectorDestroy(&q_weight)); 2434 2435 // Build FDM diagonal 2436 CeedVector q_data; 2437 CeedScalar *q_data_array, *fdm_diagonal; 2438 CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal)); 2439 const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON; 2440 for (CeedInt c = 0; c < num_comp; c++) { 2441 for (CeedInt n = 0; n < elem_size; n++) { 2442 if (interp) fdm_diagonal[c * elem_size + n] = 1.0; 2443 if (grad) { 2444 for (CeedInt d = 0; d < dim; d++) { 2445 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2446 fdm_diagonal[c * elem_size + n] += lambda[i]; 2447 } 2448 } 2449 if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound; 2450 } 2451 } 2452 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data)); 2453 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2454 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2455 for (CeedInt e = 0; e < num_elem; e++) { 2456 for (CeedInt c = 0; c < num_comp; c++) { 2457 for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]); 2458 } 2459 } 2460 CeedCall(CeedFree(&elem_avg)); 2461 CeedCall(CeedFree(&fdm_diagonal)); 2462 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2463 2464 // Setup FDM operator 2465 // -- Basis 2466 CeedBasis fdm_basis; 2467 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2468 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2469 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2470 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2471 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2472 CeedCall(CeedFree(&fdm_interp)); 2473 CeedCall(CeedFree(&grad_dummy)); 2474 CeedCall(CeedFree(&q_ref_dummy)); 2475 CeedCall(CeedFree(&q_weight_dummy)); 2476 CeedCall(CeedFree(&lambda)); 2477 2478 // -- Restriction 2479 CeedElemRestriction rstr_qd_i; 2480 CeedInt strides[3] = {1, elem_size, elem_size * num_comp}; 2481 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i)); 2482 // -- QFunction 2483 CeedQFunction qf_fdm; 2484 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2485 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2486 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2487 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2488 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2489 // -- QFunction context 2490 CeedInt *num_comp_data; 2491 CeedCall(CeedCalloc(1, &num_comp_data)); 2492 num_comp_data[0] = num_comp; 2493 CeedQFunctionContext ctx_fdm; 2494 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2495 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2496 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2497 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2498 // -- Operator 2499 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2500 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2501 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2502 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2503 2504 // Cleanup 2505 CeedCall(CeedVectorDestroy(&q_data)); 2506 CeedCall(CeedBasisDestroy(&fdm_basis)); 2507 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2508 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2509 2510 return CEED_ERROR_SUCCESS; 2511 } 2512 2513 /// @} 2514