1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <assert.h> 9 #include <ceed-impl.h> 10 #include <ceed/backend.h> 11 #include <ceed/ceed.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 bool is_composite; 84 Ceed ceed_fallback; 85 86 // Check not already created 87 if (op->op_fallback) return CEED_ERROR_SUCCESS; 88 89 // Fallback Ceed 90 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 91 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 92 93 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 94 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 95 96 // Clone Op 97 CeedOperator op_fallback; 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 117 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 118 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 119 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 120 op->input_fields[i]->vec)); 121 } 122 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 123 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 124 op->output_fields[i]->vec)); 125 } 126 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 127 if (op_fallback->num_qpts == 0) { 128 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 129 } 130 // Cleanup 131 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 132 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 133 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 134 } 135 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 136 CeedCall(CeedOperatorCheckReady(op_fallback)); 137 op->op_fallback = op_fallback; 138 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 144 145 @param[in] op CeedOperator to retrieve fallback for 146 @param[out] op_fallback Fallback CeedOperator 147 148 @return An error code: 0 - success, otherwise - failure 149 150 @ref Developer 151 **/ 152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 153 // Create if needed 154 if (!op->op_fallback) { 155 CeedCall(CeedOperatorCreateFallback(op)); 156 } 157 if (op->op_fallback) { 158 bool is_debug; 159 160 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 161 if (is_debug) { 162 Ceed ceed, ceed_fallback; 163 const char *resource, *resource_fallback; 164 165 CeedCall(CeedOperatorGetCeed(op, &ceed)); 166 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 167 CeedCall(CeedGetResource(ceed, &resource)); 168 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 169 170 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 171 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 172 op->op_fallback); 173 } 174 } 175 *op_fallback = op->op_fallback; 176 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /** 181 @brief Select correct basis matrix pointer based on CeedEvalMode 182 183 @param[in] eval_mode Current basis evaluation mode 184 @param[in] identity Pointer to identity matrix 185 @param[in] interp Pointer to interpolation matrix 186 @param[in] grad Pointer to gradient matrix 187 @param[out] basis_ptr Basis pointer to set 188 189 @ref Developer 190 **/ 191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad, 192 const CeedScalar **basis_ptr) { 193 switch (eval_mode) { 194 case CEED_EVAL_NONE: 195 *basis_ptr = identity; 196 break; 197 case CEED_EVAL_INTERP: 198 *basis_ptr = interp; 199 break; 200 case CEED_EVAL_GRAD: 201 *basis_ptr = grad; 202 break; 203 case CEED_EVAL_WEIGHT: 204 case CEED_EVAL_DIV: 205 case CEED_EVAL_CURL: 206 break; // Caught by QF Assembly 207 } 208 assert(*basis_ptr != NULL); 209 } 210 211 /** 212 @brief Create point block restriction for active operator field 213 214 @param[in] rstr Original CeedElemRestriction for active field 215 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 216 217 @return An error code: 0 - success, otherwise - failure 218 219 @ref Developer 220 **/ 221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 222 Ceed ceed; 223 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 224 const CeedInt *offsets; 225 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 226 227 // Expand offsets 228 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 229 CeedSize l_size; 230 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 231 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 232 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 233 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 234 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 235 CeedInt shift = num_comp; 236 if (comp_stride != 1) shift *= num_comp; 237 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 238 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 239 pointblock_offsets[i] = offsets[i] * shift; 240 } 241 242 // Create new restriction 243 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 244 pointblock_offsets, pointblock_rstr)); 245 246 // Cleanup 247 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 248 249 return CEED_ERROR_SUCCESS; 250 } 251 252 /** 253 @brief Core logic for assembling operator diagonal or point block diagonal 254 255 @param[in] op CeedOperator to assemble point block diagonal 256 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 257 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 258 @param[out] assembled CeedVector to store assembled diagonal 259 260 @return An error code: 0 - success, otherwise - failure 261 262 @ref Developer 263 **/ 264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 265 Ceed ceed; 266 CeedCall(CeedOperatorGetCeed(op, &ceed)); 267 268 // Assemble QFunction 269 CeedQFunction qf; 270 const CeedScalar *assembled_qf_array; 271 CeedVector assembled_qf; 272 CeedElemRestriction assembled_elem_rstr; 273 CeedInt num_input_fields, num_output_fields; 274 CeedInt layout[3]; 275 276 CeedCall(CeedOperatorGetQFunction(op, &qf)); 277 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 278 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 279 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout)); 280 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 281 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 282 283 // Get assembly data 284 CeedOperatorAssemblyData data; 285 const CeedEvalMode **eval_modes_in, **eval_modes_out; 286 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 287 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 288 CeedBasis *active_bases; 289 CeedElemRestriction *active_elem_rstrs; 290 291 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 292 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 293 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components)); 294 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL)); 295 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs)); 296 297 // Loop over all active bases 298 for (CeedInt b = 0; b < num_active_bases; b++) { 299 // Assemble point block diagonal restriction, if needed 300 CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b]; 301 302 if (is_pointblock) { 303 CeedElemRestriction point_block_elem_rstr; 304 305 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr)); 306 diag_elem_rstr = point_block_elem_rstr; 307 } 308 309 // Create diagonal vector 310 CeedVector elem_diag; 311 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array; 315 CeedInt num_elem, num_nodes, num_qpts, num_components; 316 317 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 318 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 319 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 320 CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes)); 321 CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components)); 322 CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts)); 323 324 // Basis matrices 325 const CeedScalar *interp, *grad; 326 CeedScalar *identity = NULL; 327 bool has_eval_none = false; 328 for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) { 329 has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE); 330 } 331 for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) { 332 has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE); 333 } 334 if (has_eval_none) { 335 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 336 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 337 } 338 CeedCall(CeedBasisGetInterp(active_bases[b], &interp)); 339 CeedCall(CeedBasisGetGrad(active_bases[b], &grad)); 340 // Compute the diagonal of B^T D B 341 // Each element 342 for (CeedInt e = 0; e < num_elem; e++) { 343 CeedInt d_out = -1; 344 // Each basis eval mode pair 345 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) { 346 const CeedScalar *B_t = NULL; 347 if (eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out += 1; 348 CeedOperatorGetBasisPointer(eval_modes_out[b][e_out], identity, interp, &grad[d_out * num_qpts * num_nodes], &B_t); 349 CeedInt d_in = -1; 350 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) { 351 const CeedScalar *B = NULL; 352 if (eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in += 1; 353 CeedOperatorGetBasisPointer(eval_modes_in[b][e_in], identity, interp, &grad[d_in * num_qpts * num_nodes], &B); 354 // Each component 355 for (CeedInt c_out = 0; c_out < num_components; c_out++) { 356 // Each qpt/node pair 357 for (CeedInt q = 0; q < num_qpts; q++) { 358 if (is_pointblock) { 359 // Point Block Diagonal 360 for (CeedInt c_in = 0; c_in < num_components; c_in++) { 361 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 362 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 363 for (CeedInt n = 0; n < num_nodes; n++) { 364 elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] += 365 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 366 } 367 } 368 } else { 369 // Diagonal Only 370 const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out; 371 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]]; 372 for (CeedInt n = 0; n < num_nodes; n++) { 373 elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 374 } 375 } 376 } 377 } 378 } 379 } 380 } 381 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 382 383 // Assemble local operator diagonal 384 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 385 386 // Cleanup 387 if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 388 CeedCall(CeedVectorDestroy(&elem_diag)); 389 CeedCall(CeedFree(&identity)); 390 } 391 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 392 CeedCall(CeedVectorDestroy(&assembled_qf)); 393 394 return CEED_ERROR_SUCCESS; 395 } 396 397 /** 398 @brief Core logic for assembling composite operator diagonal 399 400 @param[in] op CeedOperator to assemble point block diagonal 401 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 402 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 403 @param[out] assembled CeedVector to store assembled diagonal 404 405 @return An error code: 0 - success, otherwise - failure 406 407 @ref Developer 408 **/ 409 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 410 CeedVector assembled) { 411 CeedInt num_sub; 412 CeedOperator *suboperators; 413 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 414 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 415 for (CeedInt i = 0; i < num_sub; i++) { 416 if (is_pointblock) { 417 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 418 } else { 419 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 420 } 421 } 422 return CEED_ERROR_SUCCESS; 423 } 424 425 /** 426 @brief Build nonzero pattern for non-composite operator 427 428 Users should generally use CeedOperatorLinearAssembleSymbolic() 429 430 @param[in] op CeedOperator to assemble nonzero pattern 431 @param[in] offset Offset for number of entries 432 @param[out] rows Row number for each entry 433 @param[out] cols Column number for each entry 434 435 @return An error code: 0 - success, otherwise - failure 436 437 @ref Developer 438 **/ 439 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 440 Ceed ceed; 441 bool is_composite; 442 CeedCall(CeedOperatorGetCeed(op, &ceed)); 443 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 444 445 if (is_composite) { 446 // LCOV_EXCL_START 447 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 448 // LCOV_EXCL_STOP 449 } 450 451 CeedSize num_nodes; 452 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 453 CeedElemRestriction rstr_in; 454 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 455 CeedInt num_elem, elem_size, num_comp; 456 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 457 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 458 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 459 CeedInt layout_er[3]; 460 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 461 462 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 463 464 // Determine elem_dof relation 465 CeedVector index_vec; 466 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 467 CeedScalar *array; 468 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 469 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 470 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 471 CeedVector elem_dof; 472 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 473 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 474 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 475 const CeedScalar *elem_dof_a; 476 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 477 CeedCall(CeedVectorDestroy(&index_vec)); 478 479 // Determine i, j locations for element matrices 480 CeedInt count = 0; 481 for (CeedInt e = 0; e < num_elem; e++) { 482 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 483 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 484 for (CeedInt i = 0; i < elem_size; i++) { 485 for (CeedInt j = 0; j < elem_size; j++) { 486 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 487 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 488 489 const CeedInt row = elem_dof_a[elem_dof_index_row]; 490 const CeedInt col = elem_dof_a[elem_dof_index_col]; 491 492 rows[offset + count] = row; 493 cols[offset + count] = col; 494 count++; 495 } 496 } 497 } 498 } 499 } 500 if (count != local_num_entries) { 501 // LCOV_EXCL_START 502 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 503 // LCOV_EXCL_STOP 504 } 505 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 506 CeedCall(CeedVectorDestroy(&elem_dof)); 507 508 return CEED_ERROR_SUCCESS; 509 } 510 511 /** 512 @brief Assemble nonzero entries for non-composite operator 513 514 Users should generally use CeedOperatorLinearAssemble() 515 516 @param[in] op CeedOperator to assemble 517 @param[in] offset Offset for number of entries 518 @param[out] values Values to assemble into matrix 519 520 @return An error code: 0 - success, otherwise - failure 521 522 @ref Developer 523 **/ 524 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 525 Ceed ceed; 526 bool is_composite; 527 CeedCall(CeedOperatorGetCeed(op, &ceed)); 528 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 529 530 if (is_composite) { 531 // LCOV_EXCL_START 532 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 533 // LCOV_EXCL_STOP 534 } 535 536 // Early exit for empty operator 537 { 538 CeedInt num_elem = 0; 539 540 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 541 if (num_elem == 0) return CEED_ERROR_SUCCESS; 542 } 543 544 if (op->LinearAssembleSingle) { 545 // Backend version 546 CeedCall(op->LinearAssembleSingle(op, offset, values)); 547 return CEED_ERROR_SUCCESS; 548 } else { 549 // Operator fallback 550 CeedOperator op_fallback; 551 552 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 553 if (op_fallback) { 554 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 555 return CEED_ERROR_SUCCESS; 556 } 557 } 558 559 // Assemble QFunction 560 CeedQFunction qf; 561 CeedCall(CeedOperatorGetQFunction(op, &qf)); 562 CeedVector assembled_qf; 563 CeedElemRestriction rstr_q; 564 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 565 CeedSize qf_length; 566 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 567 568 CeedInt num_input_fields, num_output_fields; 569 CeedOperatorField *input_fields; 570 CeedOperatorField *output_fields; 571 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 572 573 // Get assembly data 574 CeedOperatorAssemblyData data; 575 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 576 const CeedEvalMode **eval_modes_in, **eval_modes_out; 577 CeedInt *num_eval_modes_in, *num_eval_modes_out, num_active_bases; 578 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out, 579 &eval_modes_out, NULL, NULL)); 580 CeedBasis *bases; 581 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL)); 582 CeedBasis basis_in = bases[0]; 583 584 if (num_active_bases > 1) { 585 // LCOV_EXCL_START 586 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases"); 587 // LCOV_EXCL_STOP 588 } 589 if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) { 590 // LCOV_EXCL_START 591 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 592 // LCOV_EXCL_STOP 593 } 594 595 CeedElemRestriction active_rstr; 596 CeedInt num_elem, elem_size, num_qpts, num_comp; 597 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 598 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 599 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 600 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 601 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 602 603 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 604 605 // loop over elements and put in data structure 606 const CeedScalar *interp_in, *grad_in; 607 CeedCall(CeedBasisGetInterp(basis_in, &interp_in)); 608 CeedCall(CeedBasisGetGrad(basis_in, &grad_in)); 609 610 const CeedScalar *assembled_qf_array; 611 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 612 613 CeedInt layout_qf[3]; 614 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 615 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 616 617 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 618 const CeedScalar **B_mats_in, **B_mats_out; 619 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out)); 620 const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0]; 621 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]]; 622 CeedScalar elem_mat[elem_size * elem_size]; 623 CeedInt count = 0; 624 CeedScalar *vals; 625 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 626 for (CeedInt e = 0; e < num_elem; e++) { 627 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 628 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 629 // Compute B^T*D 630 for (CeedInt n = 0; n < elem_size; n++) { 631 for (CeedInt q = 0; q < num_qpts; q++) { 632 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 633 const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in); 634 CeedScalar sum = 0.0; 635 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 636 const CeedInt b_out_index = (num_eval_modes_out[0] * q + e_out) * elem_size + n; 637 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out; 638 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 639 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 640 } 641 BTD_mat[btd_index] = sum; 642 } 643 } 644 } 645 // form element matrix itself (for each block component) 646 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0])); 647 648 // put element matrix in coordinate data structure 649 for (CeedInt i = 0; i < elem_size; i++) { 650 for (CeedInt j = 0; j < elem_size; j++) { 651 vals[offset + count] = elem_mat[i * elem_size + j]; 652 count++; 653 } 654 } 655 } 656 } 657 } 658 if (count != local_num_entries) { 659 // LCOV_EXCL_START 660 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 661 // LCOV_EXCL_STOP 662 } 663 CeedCall(CeedVectorRestoreArray(values, &vals)); 664 665 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 666 CeedCall(CeedVectorDestroy(&assembled_qf)); 667 668 return CEED_ERROR_SUCCESS; 669 } 670 671 /** 672 @brief Count number of entries for assembled CeedOperator 673 674 @param[in] op CeedOperator to assemble 675 @param[out] num_entries Number of entries in assembled representation 676 677 @return An error code: 0 - success, otherwise - failure 678 679 @ref Utility 680 **/ 681 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 682 bool is_composite; 683 CeedElemRestriction rstr; 684 CeedInt num_elem, elem_size, num_comp; 685 686 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 687 if (is_composite) { 688 // LCOV_EXCL_START 689 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 690 // LCOV_EXCL_STOP 691 } 692 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 693 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 694 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 695 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 696 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 697 698 return CEED_ERROR_SUCCESS; 699 } 700 701 /** 702 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 703 704 @param[in] op_fine Fine grid operator 705 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 706 @param[in] rstr_coarse Coarse grid restriction 707 @param[in] basis_coarse Coarse grid active vector basis 708 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 709 @param[out] op_coarse Coarse grid operator 710 @param[out] op_prolong Coarse to fine operator, or NULL 711 @param[out] op_restrict Fine to coarse operator, or NULL 712 713 @return An error code: 0 - success, otherwise - failure 714 715 @ref Developer 716 **/ 717 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 718 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 719 Ceed ceed; 720 CeedVector mult_vec = NULL; 721 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 722 723 // Check for composite operator 724 bool is_composite; 725 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 726 if (is_composite) { 727 // LCOV_EXCL_START 728 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 729 // LCOV_EXCL_STOP 730 } 731 732 // Coarse Grid 733 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 734 CeedElemRestriction rstr_fine = NULL; 735 // -- Clone input fields 736 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 737 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 738 rstr_fine = op_fine->input_fields[i]->elem_rstr; 739 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 740 } else { 741 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 742 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 743 } 744 } 745 // -- Clone output fields 746 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 747 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 748 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 749 } else { 750 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 751 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 752 } 753 } 754 // -- Clone QFunctionAssemblyData 755 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 756 757 // Multiplicity vector 758 if (op_restrict || op_prolong) { 759 CeedVector mult_e_vec; 760 761 if (!p_mult_fine) { 762 // LCOV_EXCL_START 763 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 764 // LCOV_EXCL_STOP 765 } 766 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 767 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 768 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 769 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 770 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 771 CeedCall(CeedVectorDestroy(&mult_e_vec)); 772 CeedCall(CeedVectorReciprocal(mult_vec)); 773 } 774 775 // Clone name 776 bool has_name = op_fine->name; 777 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 778 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 779 780 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 781 if ((op_restrict || op_prolong) && !basis_c_to_f) { 782 // LCOV_EXCL_START 783 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis"); 784 // LCOV_EXCL_STOP 785 } 786 787 // Restriction/Prolongation Operators 788 CeedInt num_comp; 789 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 790 791 // Restriction 792 if (op_restrict) { 793 CeedInt *num_comp_r_data; 794 CeedQFunction qf_restrict; 795 CeedQFunctionContext ctx_r; 796 797 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 798 CeedCall(CeedCalloc(1, &num_comp_r_data)); 799 num_comp_r_data[0] = num_comp; 800 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 801 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 802 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 803 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 804 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 805 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 806 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 807 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 808 809 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 810 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 811 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 812 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 813 814 // Set name 815 char *restriction_name; 816 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 817 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 818 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 819 CeedCall(CeedFree(&restriction_name)); 820 821 // Check 822 CeedCall(CeedOperatorCheckReady(*op_restrict)); 823 824 // Cleanup 825 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 826 } 827 828 // Prolongation 829 if (op_prolong) { 830 CeedInt *num_comp_p_data; 831 CeedQFunction qf_prolong; 832 CeedQFunctionContext ctx_p; 833 834 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 835 CeedCall(CeedCalloc(1, &num_comp_p_data)); 836 num_comp_p_data[0] = num_comp; 837 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 838 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 839 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 840 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 841 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 842 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 843 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 844 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 845 846 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 847 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 848 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 849 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 850 851 // Set name 852 char *prolongation_name; 853 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 854 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 855 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 856 CeedCall(CeedFree(&prolongation_name)); 857 858 // Check 859 CeedCall(CeedOperatorCheckReady(*op_prolong)); 860 861 // Cleanup 862 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 863 } 864 865 // Check 866 CeedCall(CeedOperatorCheckReady(*op_coarse)); 867 868 // Cleanup 869 CeedCall(CeedVectorDestroy(&mult_vec)); 870 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 871 872 return CEED_ERROR_SUCCESS; 873 } 874 875 /** 876 @brief Build 1D mass matrix and Laplacian with perturbation 877 878 @param[in] interp_1d Interpolation matrix in one dimension 879 @param[in] grad_1d Gradient matrix in one dimension 880 @param[in] q_weight_1d Quadrature weights in one dimension 881 @param[in] P_1d Number of basis nodes in one dimension 882 @param[in] Q_1d Number of quadrature points in one dimension 883 @param[in] dim Dimension of basis 884 @param[out] mass Assembled mass matrix in one dimension 885 @param[out] laplace Assembled perturbed Laplacian in one dimension 886 887 @return An error code: 0 - success, otherwise - failure 888 889 @ref Developer 890 **/ 891 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, 892 CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 893 for (CeedInt i = 0; i < P_1d; i++) { 894 for (CeedInt j = 0; j < P_1d; j++) { 895 CeedScalar sum = 0.0; 896 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 897 mass[i + j * P_1d] = sum; 898 } 899 } 900 // -- Laplacian 901 for (CeedInt i = 0; i < P_1d; i++) { 902 for (CeedInt j = 0; j < P_1d; j++) { 903 CeedScalar sum = 0.0; 904 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 905 laplace[i + j * P_1d] = sum; 906 } 907 } 908 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 909 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 910 return CEED_ERROR_SUCCESS; 911 } 912 CeedPragmaOptimizeOn; 913 914 /// @} 915 916 /// ---------------------------------------------------------------------------- 917 /// CeedOperator Backend API 918 /// ---------------------------------------------------------------------------- 919 /// @addtogroup CeedOperatorBackend 920 /// @{ 921 922 /** 923 @brief Create object holding CeedQFunction assembly data for CeedOperator 924 925 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 926 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 927 928 @return An error code: 0 - success, otherwise - failure 929 930 @ref Backend 931 **/ 932 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 933 CeedCall(CeedCalloc(1, data)); 934 (*data)->ref_count = 1; 935 (*data)->ceed = ceed; 936 CeedCall(CeedReference(ceed)); 937 938 return CEED_ERROR_SUCCESS; 939 } 940 941 /** 942 @brief Increment the reference counter for a CeedQFunctionAssemblyData 943 944 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 945 946 @return An error code: 0 - success, otherwise - failure 947 948 @ref Backend 949 **/ 950 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 951 data->ref_count++; 952 return CEED_ERROR_SUCCESS; 953 } 954 955 /** 956 @brief Set re-use of CeedQFunctionAssemblyData 957 958 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 959 @param[in] reuse_data Boolean flag indicating data re-use 960 961 @return An error code: 0 - success, otherwise - failure 962 963 @ref Backend 964 **/ 965 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 966 data->reuse_data = reuse_data; 967 data->needs_data_update = true; 968 return CEED_ERROR_SUCCESS; 969 } 970 971 /** 972 @brief Mark QFunctionAssemblyData as stale 973 974 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 975 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 976 977 @return An error code: 0 - success, otherwise - failure 978 979 @ref Backend 980 **/ 981 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 982 data->needs_data_update = needs_data_update; 983 return CEED_ERROR_SUCCESS; 984 } 985 986 /** 987 @brief Determine if QFunctionAssemblyData needs update 988 989 @param[in] data CeedQFunctionAssemblyData to mark as stale 990 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 991 992 @return An error code: 0 - success, otherwise - failure 993 994 @ref Backend 995 **/ 996 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 997 *is_update_needed = !data->reuse_data || data->needs_data_update; 998 return CEED_ERROR_SUCCESS; 999 } 1000 1001 /** 1002 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1003 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1004 Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData. 1005 This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData. 1006 1007 @param[in] data CeedQFunctionAssemblyData to copy reference to 1008 @param[in,out] data_copy Variable to store copied reference 1009 1010 @return An error code: 0 - success, otherwise - failure 1011 1012 @ref Backend 1013 **/ 1014 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1015 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1016 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1017 *data_copy = data; 1018 return CEED_ERROR_SUCCESS; 1019 } 1020 1021 /** 1022 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1023 1024 @param[in] data CeedQFunctionAssemblyData to retrieve status 1025 @param[out] is_setup Boolean flag for setup status 1026 1027 @return An error code: 0 - success, otherwise - failure 1028 1029 @ref Backend 1030 **/ 1031 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1032 *is_setup = data->is_setup; 1033 return CEED_ERROR_SUCCESS; 1034 } 1035 1036 /** 1037 @brief Set internal objects for CeedQFunctionAssemblyData 1038 1039 @param[in,out] data CeedQFunctionAssemblyData to set objects 1040 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1041 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1042 1043 @return An error code: 0 - success, otherwise - failure 1044 1045 @ref Backend 1046 **/ 1047 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1048 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1049 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1050 1051 data->is_setup = true; 1052 return CEED_ERROR_SUCCESS; 1053 } 1054 1055 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1056 if (!data->is_setup) { 1057 // LCOV_EXCL_START 1058 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1059 // LCOV_EXCL_STOP 1060 } 1061 1062 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1063 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1064 1065 return CEED_ERROR_SUCCESS; 1066 } 1067 1068 /** 1069 @brief Destroy CeedQFunctionAssemblyData 1070 1071 @param[in,out] data CeedQFunctionAssemblyData to destroy 1072 1073 @return An error code: 0 - success, otherwise - failure 1074 1075 @ref Backend 1076 **/ 1077 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1078 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1079 1080 CeedCall(CeedDestroy(&(*data)->ceed)); 1081 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1082 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1083 1084 CeedCall(CeedFree(data)); 1085 return CEED_ERROR_SUCCESS; 1086 } 1087 1088 /** 1089 @brief Get CeedOperatorAssemblyData 1090 1091 @param[in] op CeedOperator to assemble 1092 @param[out] data CeedQFunctionAssemblyData 1093 1094 @return An error code: 0 - success, otherwise - failure 1095 1096 @ref Backend 1097 **/ 1098 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1099 if (!op->op_assembled) { 1100 CeedOperatorAssemblyData data; 1101 1102 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1103 op->op_assembled = data; 1104 } 1105 *data = op->op_assembled; 1106 1107 return CEED_ERROR_SUCCESS; 1108 } 1109 1110 /** 1111 @brief Create object holding CeedOperator assembly data. 1112 1113 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1114 An array with references to the corresponding active CeedElemRestrictions is also stored. 1115 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1116 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1117 CeedEvalMode. 1118 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1119 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1120 1121 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1122 @param[in] op CeedOperator to be assembled 1123 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1124 1125 @return An error code: 0 - success, otherwise - failure 1126 1127 @ref Backend 1128 **/ 1129 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1130 CeedInt num_active_bases = 0; 1131 1132 // Allocate 1133 CeedCall(CeedCalloc(1, data)); 1134 (*data)->ceed = ceed; 1135 CeedCall(CeedReference(ceed)); 1136 1137 // Build OperatorAssembly data 1138 CeedQFunction qf; 1139 CeedQFunctionField *qf_fields; 1140 CeedOperatorField *op_fields; 1141 CeedInt num_input_fields; 1142 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1143 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1144 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1145 1146 // Determine active input basis 1147 CeedInt *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0; 1148 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1149 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1150 1151 for (CeedInt i = 0; i < num_input_fields; i++) { 1152 CeedVector vec; 1153 1154 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1155 if (vec == CEED_VECTOR_ACTIVE) { 1156 CeedInt index = -1, dim = 1, num_components; 1157 CeedBasis basis_in = NULL; 1158 CeedEvalMode eval_mode; 1159 1160 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1161 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1162 CeedCall(CeedBasisGetNumComponents(basis_in, &num_components)); 1163 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1164 for (CeedInt i = 0; i < num_active_bases; i++) { 1165 if ((*data)->active_bases[i] == basis_in) index = i; 1166 } 1167 if (index == -1) { 1168 CeedElemRestriction elem_rstr_in; 1169 1170 index = num_active_bases; 1171 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1172 (*data)->active_bases[num_active_bases] = NULL; 1173 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases])); 1174 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1175 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1176 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1177 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases])); 1178 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1179 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1180 num_eval_modes_in[index] = 0; 1181 num_eval_modes_out[index] = 0; 1182 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1183 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1184 eval_modes_in[index] = NULL; 1185 eval_modes_out[index] = NULL; 1186 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1187 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1188 eval_mode_offsets_in[index] = NULL; 1189 eval_mode_offsets_out[index] = NULL; 1190 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1191 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1192 (*data)->assembled_bases_in[index] = NULL; 1193 (*data)->assembled_bases_out[index] = NULL; 1194 num_active_bases++; 1195 } 1196 switch (eval_mode) { 1197 case CEED_EVAL_NONE: 1198 case CEED_EVAL_INTERP: 1199 CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_modes_in[index])); 1200 CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_mode_offsets_in[index])); 1201 eval_modes_in[index][num_eval_modes_in[index]] = eval_mode; 1202 eval_mode_offsets_in[index][num_eval_modes_in[index]] = offset; 1203 offset += num_components; 1204 num_eval_modes_in[index] += 1; 1205 break; 1206 case CEED_EVAL_GRAD: 1207 CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_modes_in[index])); 1208 CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_mode_offsets_in[index])); 1209 for (CeedInt d = 0; d < dim; d++) { 1210 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1211 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1212 offset += num_components; 1213 } 1214 num_eval_modes_in[index] += dim; 1215 break; 1216 case CEED_EVAL_WEIGHT: 1217 case CEED_EVAL_DIV: 1218 case CEED_EVAL_CURL: 1219 break; // Caught by QF Assembly 1220 } 1221 } 1222 } 1223 (*data)->num_eval_modes_in = num_eval_modes_in; 1224 (*data)->eval_modes_in = eval_modes_in; 1225 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1226 1227 // Determine active output basis 1228 CeedInt num_output_fields; 1229 1230 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1231 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1232 offset = 0; 1233 for (CeedInt i = 0; i < num_output_fields; i++) { 1234 CeedVector vec; 1235 1236 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1237 if (vec == CEED_VECTOR_ACTIVE) { 1238 CeedInt index = -1, dim = 1, num_components; 1239 CeedBasis basis_out = NULL; 1240 CeedEvalMode eval_mode; 1241 1242 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1243 CeedCall(CeedBasisGetDimension(basis_out, &dim)); 1244 CeedCall(CeedBasisGetNumComponents(basis_out, &num_components)); 1245 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1246 for (CeedInt i = 0; i < num_active_bases; i++) { 1247 if ((*data)->active_bases[i] == basis_out) index = i; 1248 } 1249 if (index == -1) { 1250 CeedElemRestriction elem_rstr_out; 1251 1252 index = num_active_bases; 1253 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases)); 1254 (*data)->active_bases[num_active_bases] = NULL; 1255 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases])); 1256 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs)); 1257 (*data)->active_elem_rstrs[num_active_bases] = NULL; 1258 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1259 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases])); 1260 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in)); 1261 CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out)); 1262 num_eval_modes_in[index] = 0; 1263 num_eval_modes_out[index] = 0; 1264 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in)); 1265 CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out)); 1266 eval_modes_in[index] = NULL; 1267 eval_modes_out[index] = NULL; 1268 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in)); 1269 CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out)); 1270 eval_mode_offsets_in[index] = NULL; 1271 eval_mode_offsets_out[index] = NULL; 1272 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in)); 1273 CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out)); 1274 (*data)->assembled_bases_in[index] = NULL; 1275 (*data)->assembled_bases_out[index] = NULL; 1276 num_active_bases++; 1277 } 1278 switch (eval_mode) { 1279 case CEED_EVAL_NONE: 1280 case CEED_EVAL_INTERP: 1281 CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_modes_out[index])); 1282 CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_mode_offsets_out[index])); 1283 eval_modes_out[index][num_eval_modes_out[index]] = eval_mode; 1284 eval_mode_offsets_out[index][num_eval_modes_out[index]] = offset; 1285 offset += num_components; 1286 num_eval_modes_out[index] += 1; 1287 break; 1288 case CEED_EVAL_GRAD: 1289 CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_modes_out[index])); 1290 CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_mode_offsets_out[index])); 1291 for (CeedInt d = 0; d < dim; d++) { 1292 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1293 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1294 offset += num_components; 1295 } 1296 num_eval_modes_out[index] += dim; 1297 break; 1298 case CEED_EVAL_WEIGHT: 1299 case CEED_EVAL_DIV: 1300 case CEED_EVAL_CURL: 1301 break; // Caught by QF Assembly 1302 } 1303 } 1304 } 1305 (*data)->num_output_components = offset; 1306 (*data)->num_eval_modes_out = num_eval_modes_out; 1307 (*data)->eval_modes_out = eval_modes_out; 1308 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1309 (*data)->num_active_bases = num_active_bases; 1310 1311 return CEED_ERROR_SUCCESS; 1312 } 1313 1314 /** 1315 @brief Get CeedOperator CeedEvalModes for assembly. 1316 1317 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1318 1319 @param[in] data CeedOperatorAssemblyData 1320 @param[out] num_active_bases Total number of active bases 1321 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1322 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1323 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1324 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1325 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1326 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1327 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1328 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1329 including contributions of all active bases 1330 1331 @return An error code: 0 - success, otherwise - failure 1332 1333 1334 @ref Backend 1335 **/ 1336 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in, 1337 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out, 1338 const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) { 1339 if (num_active_bases) *num_active_bases = data->num_active_bases; 1340 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1341 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1342 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1343 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1344 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1345 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1346 if (num_output_components) *num_output_components = data->num_output_components; 1347 1348 return CEED_ERROR_SUCCESS; 1349 } 1350 1351 /** 1352 @brief Get CeedOperator CeedBasis data for assembly. 1353 1354 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1355 1356 @param[in] data CeedOperatorAssemblyData 1357 @param[out] num_active_bases Number of active bases, or NULL 1358 @param[out] active_bases Pointer to hold active CeedBasis, or NULL 1359 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1360 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1361 1362 @return An error code: 0 - success, otherwise - failure 1363 1364 @ref Backend 1365 **/ 1366 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases, 1367 const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) { 1368 // Assemble B_in, B_out if needed 1369 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1370 CeedInt num_qpts; 1371 1372 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1373 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1374 CeedInt elem_size; 1375 CeedScalar *B_in = NULL, *identity = NULL; 1376 const CeedScalar *interp_in, *grad_in; 1377 bool has_eval_none = false; 1378 1379 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size)); 1380 CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_in[b], &B_in)); 1381 1382 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1383 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1384 } 1385 if (has_eval_none) { 1386 CeedCall(CeedCalloc(num_qpts * elem_size, &identity)); 1387 for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { 1388 identity[i * elem_size + i] = 1.0; 1389 } 1390 } 1391 CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_in)); 1392 CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_in)); 1393 1394 for (CeedInt q = 0; q < num_qpts; q++) { 1395 for (CeedInt n = 0; n < elem_size; n++) { 1396 CeedInt d_in = -1; 1397 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1398 const CeedInt qq = data->num_eval_modes_in[b] * q; 1399 const CeedScalar *B = NULL; 1400 1401 if (data->eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in++; 1402 CeedOperatorGetBasisPointer(data->eval_modes_in[b][e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &B); 1403 B_in[(qq + e_in) * elem_size + n] = B[q * elem_size + n]; 1404 } 1405 } 1406 } 1407 if (identity) CeedCall(CeedFree(identity)); 1408 data->assembled_bases_in[b] = B_in; 1409 } 1410 } 1411 1412 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1413 CeedInt num_qpts; 1414 1415 CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts)); 1416 for (CeedInt b = 0; b < data->num_active_bases; b++) { 1417 CeedInt elem_size; 1418 const CeedScalar *interp_out, *grad_out; 1419 bool has_eval_none = false; 1420 CeedScalar *B_out = NULL, *identity = NULL; 1421 1422 CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size)); 1423 CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_out[b], &B_out)); 1424 1425 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1426 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1427 } 1428 if (has_eval_none) { 1429 CeedCall(CeedCalloc(num_qpts * elem_size, &identity)); 1430 for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { 1431 identity[i * elem_size + i] = 1.0; 1432 } 1433 } 1434 CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_out)); 1435 CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_out)); 1436 1437 for (CeedInt q = 0; q < num_qpts; q++) { 1438 for (CeedInt n = 0; n < elem_size; n++) { 1439 CeedInt d_out = -1; 1440 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1441 const CeedInt qq = data->num_eval_modes_out[b] * q; 1442 const CeedScalar *B = NULL; 1443 1444 if (data->eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out++; 1445 CeedOperatorGetBasisPointer(data->eval_modes_out[b][e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &B); 1446 B_out[(qq + e_out) * elem_size + n] = B[q * elem_size + n]; 1447 } 1448 } 1449 } 1450 if (identity) CeedCall(CeedFree(identity)); 1451 data->assembled_bases_out[b] = B_out; 1452 } 1453 } 1454 1455 // Pass out assembled data 1456 if (active_bases) *active_bases = data->active_bases; 1457 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1458 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1459 1460 return CEED_ERROR_SUCCESS; 1461 } 1462 1463 /** 1464 @brief Get CeedOperator CeedBasis data for assembly. 1465 1466 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1467 1468 @param[in] data CeedOperatorAssemblyData 1469 @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL 1470 @param[out] active_elem_rstrs Pointer to hold active CeedElemRestrictions, or NULL 1471 1472 @return An error code: 0 - success, otherwise - failure 1473 1474 @ref Backend 1475 **/ 1476 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs, 1477 CeedElemRestriction **active_elem_rstrs) { 1478 if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases; 1479 if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs; 1480 1481 return CEED_ERROR_SUCCESS; 1482 } 1483 1484 /** 1485 @brief Destroy CeedOperatorAssemblyData 1486 1487 @param[in,out] data CeedOperatorAssemblyData to destroy 1488 1489 @return An error code: 0 - success, otherwise - failure 1490 1491 @ref Backend 1492 **/ 1493 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1494 if (!*data) return CEED_ERROR_SUCCESS; 1495 1496 CeedCall(CeedDestroy(&(*data)->ceed)); 1497 for (CeedInt b = 0; b < (*data)->num_active_bases; b++) { 1498 CeedCall(CeedBasisDestroy(&(*data)->active_bases[b])); 1499 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b])); 1500 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1501 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1502 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1503 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1504 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1505 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1506 } 1507 CeedCall(CeedFree(&(*data)->active_bases)); 1508 CeedCall(CeedFree(&(*data)->active_elem_rstrs)); 1509 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1510 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1511 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1512 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1513 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1514 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1515 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1516 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1517 1518 CeedCall(CeedFree(data)); 1519 return CEED_ERROR_SUCCESS; 1520 } 1521 1522 /// @} 1523 1524 /// ---------------------------------------------------------------------------- 1525 /// CeedOperator Public API 1526 /// ---------------------------------------------------------------------------- 1527 /// @addtogroup CeedOperatorUser 1528 /// @{ 1529 1530 /** 1531 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1532 1533 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1534 The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices 1535 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the 1536 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, 1537 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] 1538 and producing the output [dv_0, dv_1, v]. 1539 1540 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1541 1542 @param[in] op CeedOperator to assemble CeedQFunction 1543 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1544 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1545 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1546 1547 @return An error code: 0 - success, otherwise - failure 1548 1549 @ref User 1550 **/ 1551 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1552 CeedCall(CeedOperatorCheckReady(op)); 1553 1554 if (op->LinearAssembleQFunction) { 1555 // Backend version 1556 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1557 } else { 1558 // Operator fallback 1559 CeedOperator op_fallback; 1560 1561 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1562 if (op_fallback) { 1563 CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1564 } else { 1565 // LCOV_EXCL_START 1566 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1567 // LCOV_EXCL_STOP 1568 } 1569 } 1570 return CEED_ERROR_SUCCESS; 1571 } 1572 1573 /** 1574 @brief Assemble CeedQFunction and store result internally. 1575 Return copied references of stored data to the caller. 1576 Caller is responsible for ownership and destruction of the copied references. 1577 See also @ref CeedOperatorLinearAssembleQFunction 1578 1579 @param[in] op CeedOperator to assemble CeedQFunction 1580 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1581 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1582 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1583 1584 @return An error code: 0 - success, otherwise - failure 1585 1586 @ref User 1587 **/ 1588 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1589 CeedCall(CeedOperatorCheckReady(op)); 1590 1591 if (op->LinearAssembleQFunctionUpdate) { 1592 // Backend version 1593 bool qf_assembled_is_setup; 1594 CeedVector assembled_vec = NULL; 1595 CeedElemRestriction assembled_rstr = NULL; 1596 1597 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1598 if (qf_assembled_is_setup) { 1599 bool update_needed; 1600 1601 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1602 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1603 if (update_needed) { 1604 CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1605 } 1606 } else { 1607 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1608 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1609 } 1610 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1611 1612 // Copy reference from internally held copy 1613 *assembled = NULL; 1614 *rstr = NULL; 1615 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1616 CeedCall(CeedVectorDestroy(&assembled_vec)); 1617 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1618 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1619 } else { 1620 // Operator fallback 1621 CeedOperator op_fallback; 1622 1623 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1624 if (op_fallback) { 1625 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1626 } else { 1627 // LCOV_EXCL_START 1628 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1629 // LCOV_EXCL_STOP 1630 } 1631 } 1632 1633 return CEED_ERROR_SUCCESS; 1634 } 1635 1636 /** 1637 @brief Assemble the diagonal of a square linear CeedOperator 1638 1639 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1640 1641 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1642 1643 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1644 1645 @param[in] op CeedOperator to assemble CeedQFunction 1646 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1647 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1648 1649 @return An error code: 0 - success, otherwise - failure 1650 1651 @ref User 1652 **/ 1653 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1654 bool is_composite; 1655 CeedCall(CeedOperatorCheckReady(op)); 1656 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1657 1658 CeedSize input_size = 0, output_size = 0; 1659 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1660 if (input_size != output_size) { 1661 // LCOV_EXCL_START 1662 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1663 // LCOV_EXCL_STOP 1664 } 1665 1666 // Early exit for empty operator 1667 if (!is_composite) { 1668 CeedInt num_elem = 0; 1669 1670 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1671 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1672 } 1673 1674 if (op->LinearAssembleDiagonal) { 1675 // Backend version 1676 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1677 return CEED_ERROR_SUCCESS; 1678 } else if (op->LinearAssembleAddDiagonal) { 1679 // Backend version with zeroing first 1680 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1681 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1682 return CEED_ERROR_SUCCESS; 1683 } else { 1684 // Operator fallback 1685 CeedOperator op_fallback; 1686 1687 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1688 if (op_fallback) { 1689 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1690 return CEED_ERROR_SUCCESS; 1691 } 1692 } 1693 // Default interface implementation 1694 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1695 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1696 1697 return CEED_ERROR_SUCCESS; 1698 } 1699 1700 /** 1701 @brief Assemble the diagonal of a square linear CeedOperator 1702 1703 This sums into a CeedVector the diagonal of a linear CeedOperator. 1704 1705 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1706 1707 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1708 1709 @param[in] op CeedOperator to assemble CeedQFunction 1710 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1711 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1712 1713 @return An error code: 0 - success, otherwise - failure 1714 1715 @ref User 1716 **/ 1717 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1718 bool is_composite; 1719 CeedCall(CeedOperatorCheckReady(op)); 1720 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1721 1722 CeedSize input_size = 0, output_size = 0; 1723 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1724 if (input_size != output_size) { 1725 // LCOV_EXCL_START 1726 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1727 // LCOV_EXCL_STOP 1728 } 1729 1730 // Early exit for empty operator 1731 if (!is_composite) { 1732 CeedInt num_elem = 0; 1733 1734 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1735 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1736 } 1737 1738 if (op->LinearAssembleAddDiagonal) { 1739 // Backend version 1740 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1741 return CEED_ERROR_SUCCESS; 1742 } else { 1743 // Operator fallback 1744 CeedOperator op_fallback; 1745 1746 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1747 if (op_fallback) { 1748 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1749 return CEED_ERROR_SUCCESS; 1750 } 1751 } 1752 // Default interface implementation 1753 if (is_composite) { 1754 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1755 } else { 1756 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1757 } 1758 1759 return CEED_ERROR_SUCCESS; 1760 } 1761 1762 /** 1763 @brief Assemble the point block diagonal of a square linear CeedOperator 1764 1765 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1766 1767 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1768 1769 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1770 1771 @param[in] op CeedOperator to assemble CeedQFunction 1772 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1773 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1774 component in]. 1775 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1776 1777 @return An error code: 0 - success, otherwise - failure 1778 1779 @ref User 1780 **/ 1781 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1782 bool is_composite; 1783 CeedCall(CeedOperatorCheckReady(op)); 1784 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1785 1786 CeedSize input_size = 0, output_size = 0; 1787 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1788 if (input_size != output_size) { 1789 // LCOV_EXCL_START 1790 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1791 // LCOV_EXCL_STOP 1792 } 1793 1794 // Early exit for empty operator 1795 if (!is_composite) { 1796 CeedInt num_elem = 0; 1797 1798 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1799 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1800 } 1801 1802 if (op->LinearAssemblePointBlockDiagonal) { 1803 // Backend version 1804 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1805 return CEED_ERROR_SUCCESS; 1806 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1807 // Backend version with zeroing first 1808 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1809 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1810 return CEED_ERROR_SUCCESS; 1811 } else { 1812 // Operator fallback 1813 CeedOperator op_fallback; 1814 1815 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1816 if (op_fallback) { 1817 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1818 return CEED_ERROR_SUCCESS; 1819 } 1820 } 1821 // Default interface implementation 1822 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1823 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1824 1825 return CEED_ERROR_SUCCESS; 1826 } 1827 1828 /** 1829 @brief Assemble the point block diagonal of a square linear CeedOperator 1830 1831 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1832 1833 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1834 1835 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1836 1837 @param[in] op CeedOperator to assemble CeedQFunction 1838 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1839 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1840 component in]. 1841 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1842 1843 @return An error code: 0 - success, otherwise - failure 1844 1845 @ref User 1846 **/ 1847 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1848 bool is_composite; 1849 CeedCall(CeedOperatorCheckReady(op)); 1850 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1851 1852 CeedSize input_size = 0, output_size = 0; 1853 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1854 if (input_size != output_size) { 1855 // LCOV_EXCL_START 1856 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1857 // LCOV_EXCL_STOP 1858 } 1859 1860 // Early exit for empty operator 1861 if (!is_composite) { 1862 CeedInt num_elem = 0; 1863 1864 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1865 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1866 } 1867 1868 if (op->LinearAssembleAddPointBlockDiagonal) { 1869 // Backend version 1870 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1871 return CEED_ERROR_SUCCESS; 1872 } else { 1873 // Operator fallback 1874 CeedOperator op_fallback; 1875 1876 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1877 if (op_fallback) { 1878 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1879 return CEED_ERROR_SUCCESS; 1880 } 1881 } 1882 // Default interface implementation 1883 if (is_composite) { 1884 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1885 } else { 1886 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1887 } 1888 1889 return CEED_ERROR_SUCCESS; 1890 } 1891 1892 /** 1893 @brief Fully assemble the nonzero pattern of a linear operator. 1894 1895 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1896 1897 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1898 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j) 1899 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1900 1901 This will generally be slow unless your operator is low-order. 1902 1903 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1904 1905 @param[in] op CeedOperator to assemble 1906 @param[out] num_entries Number of entries in coordinate nonzero pattern 1907 @param[out] rows Row number for each entry 1908 @param[out] cols Column number for each entry 1909 1910 @ref User 1911 **/ 1912 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1913 CeedInt num_suboperators, single_entries; 1914 CeedOperator *sub_operators; 1915 bool is_composite; 1916 CeedCall(CeedOperatorCheckReady(op)); 1917 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1918 1919 if (op->LinearAssembleSymbolic) { 1920 // Backend version 1921 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1922 return CEED_ERROR_SUCCESS; 1923 } else { 1924 // Operator fallback 1925 CeedOperator op_fallback; 1926 1927 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1928 if (op_fallback) { 1929 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1930 return CEED_ERROR_SUCCESS; 1931 } 1932 } 1933 1934 // Default interface implementation 1935 1936 // count entries and allocate rows, cols arrays 1937 *num_entries = 0; 1938 if (is_composite) { 1939 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1940 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1941 for (CeedInt k = 0; k < num_suboperators; ++k) { 1942 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1943 *num_entries += single_entries; 1944 } 1945 } else { 1946 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1947 *num_entries += single_entries; 1948 } 1949 CeedCall(CeedCalloc(*num_entries, rows)); 1950 CeedCall(CeedCalloc(*num_entries, cols)); 1951 1952 // assemble nonzero locations 1953 CeedInt offset = 0; 1954 if (is_composite) { 1955 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1956 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1957 for (CeedInt k = 0; k < num_suboperators; ++k) { 1958 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1959 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1960 offset += single_entries; 1961 } 1962 } else { 1963 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1964 } 1965 1966 return CEED_ERROR_SUCCESS; 1967 } 1968 1969 /** 1970 @brief Fully assemble the nonzero entries of a linear operator. 1971 1972 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1973 1974 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1975 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added, 1976 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1977 1978 This will generally be slow unless your operator is low-order. 1979 1980 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1981 1982 @param[in] op CeedOperator to assemble 1983 @param[out] values Values to assemble into matrix 1984 1985 @ref User 1986 **/ 1987 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1988 CeedInt num_suboperators, single_entries = 0; 1989 CeedOperator *sub_operators; 1990 bool is_composite; 1991 CeedCall(CeedOperatorCheckReady(op)); 1992 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1993 1994 // Early exit for empty operator 1995 if (!is_composite) { 1996 CeedInt num_elem = 0; 1997 1998 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1999 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2000 } 2001 2002 if (op->LinearAssemble) { 2003 // Backend version 2004 CeedCall(op->LinearAssemble(op, values)); 2005 return CEED_ERROR_SUCCESS; 2006 } else { 2007 // Operator fallback 2008 CeedOperator op_fallback; 2009 2010 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2011 if (op_fallback) { 2012 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2013 return CEED_ERROR_SUCCESS; 2014 } 2015 } 2016 2017 // Default interface implementation 2018 CeedInt offset = 0; 2019 CeedCall(CeedVectorSetValue(values, 0.0)); 2020 if (is_composite) { 2021 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2022 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2023 for (CeedInt k = 0; k < num_suboperators; k++) { 2024 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2025 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2026 offset += single_entries; 2027 } 2028 } else { 2029 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2030 } 2031 2032 return CEED_ERROR_SUCCESS; 2033 } 2034 2035 /** 2036 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2037 2038 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2039 2040 @param[in] op Composite CeedOperator 2041 @param[in] num_skip_indices Number of suboperators to skip 2042 @param[in] skip_indices Array of indices of suboperators to skip 2043 @param[out] mult Vector to store multiplicity (of size l_size) 2044 2045 @return An error code: 0 - success, otherwise - failure 2046 2047 @ref User 2048 **/ 2049 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2050 CeedCall(CeedOperatorCheckReady(op)); 2051 2052 Ceed ceed; 2053 CeedInt num_suboperators; 2054 CeedSize l_vec_len; 2055 CeedScalar *mult_array; 2056 CeedVector ones_l_vec; 2057 CeedElemRestriction elem_rstr; 2058 CeedOperator *sub_operators; 2059 2060 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2061 2062 // Zero mult vector 2063 CeedCall(CeedVectorSetValue(mult, 0.0)); 2064 2065 // Get suboperators 2066 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2067 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2068 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2069 2070 // Work vector 2071 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2072 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2073 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2074 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2075 2076 // Compute multiplicity across suboperators 2077 for (CeedInt i = 0; i < num_suboperators; i++) { 2078 const CeedScalar *sub_mult_array; 2079 CeedVector sub_mult_l_vec, ones_e_vec; 2080 2081 // -- Check for suboperator to skip 2082 for (CeedInt j = 0; j < num_skip_indices; j++) { 2083 if (skip_indices[j] == i) continue; 2084 } 2085 2086 // -- Sub operator multiplicity 2087 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2088 CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2089 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2090 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2091 CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2092 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2093 // ---- Flag every node present in the current suboperator 2094 for (CeedInt j = 0; j < l_vec_len; j++) { 2095 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2096 } 2097 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2098 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2099 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2100 } 2101 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2102 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2103 2104 return CEED_ERROR_SUCCESS; 2105 } 2106 2107 /** 2108 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2109 grid interpolation 2110 2111 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2112 2113 @param[in] op_fine Fine grid operator 2114 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2115 @param[in] rstr_coarse Coarse grid restriction 2116 @param[in] basis_coarse Coarse grid active vector basis 2117 @param[out] op_coarse Coarse grid operator 2118 @param[out] op_prolong Coarse to fine operator, or NULL 2119 @param[out] op_restrict Fine to coarse operator, or NULL 2120 2121 @return An error code: 0 - success, otherwise - failure 2122 2123 @ref User 2124 **/ 2125 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2126 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2127 CeedCall(CeedOperatorCheckReady(op_fine)); 2128 2129 // Build prolongation matrix, if required 2130 CeedBasis basis_c_to_f = NULL; 2131 if (op_prolong || op_restrict) { 2132 CeedBasis basis_fine; 2133 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2134 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2135 } 2136 2137 // Core code 2138 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2139 2140 return CEED_ERROR_SUCCESS; 2141 } 2142 2143 /** 2144 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2145 2146 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2147 2148 @param[in] op_fine Fine grid operator 2149 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2150 @param[in] rstr_coarse Coarse grid restriction 2151 @param[in] basis_coarse Coarse grid active vector basis 2152 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2153 @param[out] op_coarse Coarse grid operator 2154 @param[out] op_prolong Coarse to fine operator, or NULL 2155 @param[out] op_restrict Fine to coarse operator, or NULL 2156 2157 @return An error code: 0 - success, otherwise - failure 2158 2159 @ref User 2160 **/ 2161 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2162 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2163 CeedOperator *op_restrict) { 2164 CeedCall(CeedOperatorCheckReady(op_fine)); 2165 Ceed ceed; 2166 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2167 2168 // Check for compatible quadrature spaces 2169 CeedBasis basis_fine; 2170 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2171 CeedInt Q_f, Q_c; 2172 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2173 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2174 if (Q_f != Q_c) { 2175 // LCOV_EXCL_START 2176 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2177 // LCOV_EXCL_STOP 2178 } 2179 2180 // Create coarse to fine basis, if required 2181 CeedBasis basis_c_to_f = NULL; 2182 if (op_prolong || op_restrict) { 2183 // Check if interpolation matrix is provided 2184 if (!interp_c_to_f) { 2185 // LCOV_EXCL_START 2186 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2187 // LCOV_EXCL_STOP 2188 } 2189 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2190 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2191 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2192 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2193 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2194 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2195 CeedScalar *q_ref, *q_weight, *grad; 2196 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2197 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2198 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2199 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2200 CeedCall(CeedFree(&q_ref)); 2201 CeedCall(CeedFree(&q_weight)); 2202 CeedCall(CeedFree(&grad)); 2203 } 2204 2205 // Core code 2206 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2207 return CEED_ERROR_SUCCESS; 2208 } 2209 2210 /** 2211 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2212 2213 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2214 2215 @param[in] op_fine Fine grid operator 2216 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2217 @param[in] rstr_coarse Coarse grid restriction 2218 @param[in] basis_coarse Coarse grid active vector basis 2219 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2220 @param[out] op_coarse Coarse grid operator 2221 @param[out] op_prolong Coarse to fine operator, or NULL 2222 @param[out] op_restrict Fine to coarse operator, or NULL 2223 2224 @return An error code: 0 - success, otherwise - failure 2225 2226 @ref User 2227 **/ 2228 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2229 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2230 CeedOperator *op_restrict) { 2231 CeedCall(CeedOperatorCheckReady(op_fine)); 2232 Ceed ceed; 2233 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2234 2235 // Check for compatible quadrature spaces 2236 CeedBasis basis_fine; 2237 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2238 CeedInt Q_f, Q_c; 2239 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2240 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2241 if (Q_f != Q_c) { 2242 // LCOV_EXCL_START 2243 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2244 // LCOV_EXCL_STOP 2245 } 2246 2247 // Coarse to fine basis 2248 CeedBasis basis_c_to_f = NULL; 2249 if (op_prolong || op_restrict) { 2250 // Check if interpolation matrix is provided 2251 if (!interp_c_to_f) { 2252 // LCOV_EXCL_START 2253 return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2254 // LCOV_EXCL_STOP 2255 } 2256 CeedElemTopology topo; 2257 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2258 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2259 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2260 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2261 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2262 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2263 CeedScalar *q_ref, *q_weight, *grad; 2264 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2265 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2266 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2267 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2268 CeedCall(CeedFree(&q_ref)); 2269 CeedCall(CeedFree(&q_weight)); 2270 CeedCall(CeedFree(&grad)); 2271 } 2272 2273 // Core code 2274 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2275 return CEED_ERROR_SUCCESS; 2276 } 2277 2278 /** 2279 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2280 2281 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2282 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V. 2283 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T 2284 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear. 2285 2286 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2287 2288 @param[in] op CeedOperator to create element inverses 2289 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2290 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2291 2292 @return An error code: 0 - success, otherwise - failure 2293 2294 @ref User 2295 **/ 2296 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2297 CeedCall(CeedOperatorCheckReady(op)); 2298 2299 if (op->CreateFDMElementInverse) { 2300 // Backend version 2301 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2302 return CEED_ERROR_SUCCESS; 2303 } else { 2304 // Operator fallback 2305 CeedOperator op_fallback; 2306 2307 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2308 if (op_fallback) { 2309 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2310 return CEED_ERROR_SUCCESS; 2311 } 2312 } 2313 2314 // Default interface implementation 2315 Ceed ceed, ceed_parent; 2316 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2317 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2318 ceed_parent = ceed_parent ? ceed_parent : ceed; 2319 CeedQFunction qf; 2320 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2321 2322 // Determine active input basis 2323 bool interp = false, grad = false; 2324 CeedBasis basis = NULL; 2325 CeedElemRestriction rstr = NULL; 2326 CeedOperatorField *op_fields; 2327 CeedQFunctionField *qf_fields; 2328 CeedInt num_input_fields; 2329 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2330 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2331 for (CeedInt i = 0; i < num_input_fields; i++) { 2332 CeedVector vec; 2333 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2334 if (vec == CEED_VECTOR_ACTIVE) { 2335 CeedEvalMode eval_mode; 2336 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2337 interp = interp || eval_mode == CEED_EVAL_INTERP; 2338 grad = grad || eval_mode == CEED_EVAL_GRAD; 2339 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2340 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2341 } 2342 } 2343 if (!basis) { 2344 // LCOV_EXCL_START 2345 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2346 // LCOV_EXCL_STOP 2347 } 2348 CeedSize l_size = 1; 2349 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2350 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2351 CeedCall(CeedBasisGetNumNodes(basis, &elem_size)); 2352 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2353 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2354 CeedCall(CeedBasisGetDimension(basis, &dim)); 2355 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2356 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2357 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2358 2359 // Build and diagonalize 1D Mass and Laplacian 2360 bool tensor_basis; 2361 CeedCall(CeedBasisIsTensor(basis, &tensor_basis)); 2362 if (!tensor_basis) { 2363 // LCOV_EXCL_START 2364 return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2365 // LCOV_EXCL_STOP 2366 } 2367 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2368 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2369 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2370 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2371 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2372 CeedCall(CeedCalloc(P_1d, &lambda)); 2373 // -- Build matrices 2374 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2375 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2376 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2377 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2378 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2379 2380 // -- Diagonalize 2381 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2382 CeedCall(CeedFree(&mass)); 2383 CeedCall(CeedFree(&laplace)); 2384 for (CeedInt i = 0; i < P_1d; i++) { 2385 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2386 } 2387 CeedCall(CeedFree(&x)); 2388 2389 // Assemble QFunction 2390 CeedVector assembled; 2391 CeedElemRestriction rstr_qf; 2392 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2393 CeedInt layout[3]; 2394 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2395 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2396 CeedScalar max_norm = 0; 2397 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2398 2399 // Calculate element averages 2400 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2401 CeedScalar *elem_avg; 2402 const CeedScalar *assembled_array, *q_weight_array; 2403 CeedVector q_weight; 2404 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2405 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2406 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2407 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2408 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2409 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2410 for (CeedInt e = 0; e < num_elem; e++) { 2411 CeedInt count = 0; 2412 for (CeedInt q = 0; q < num_qpts; q++) { 2413 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2414 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2415 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2416 count++; 2417 } 2418 } 2419 } 2420 if (count) { 2421 elem_avg[e] /= count; 2422 } else { 2423 elem_avg[e] = 1.0; 2424 } 2425 } 2426 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2427 CeedCall(CeedVectorDestroy(&assembled)); 2428 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2429 CeedCall(CeedVectorDestroy(&q_weight)); 2430 2431 // Build FDM diagonal 2432 CeedVector q_data; 2433 CeedScalar *q_data_array, *fdm_diagonal; 2434 CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal)); 2435 const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON; 2436 for (CeedInt c = 0; c < num_comp; c++) { 2437 for (CeedInt n = 0; n < elem_size; n++) { 2438 if (interp) fdm_diagonal[c * elem_size + n] = 1.0; 2439 if (grad) { 2440 for (CeedInt d = 0; d < dim; d++) { 2441 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2442 fdm_diagonal[c * elem_size + n] += lambda[i]; 2443 } 2444 } 2445 if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound; 2446 } 2447 } 2448 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data)); 2449 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2450 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2451 for (CeedInt e = 0; e < num_elem; e++) { 2452 for (CeedInt c = 0; c < num_comp; c++) { 2453 for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]); 2454 } 2455 } 2456 CeedCall(CeedFree(&elem_avg)); 2457 CeedCall(CeedFree(&fdm_diagonal)); 2458 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2459 2460 // Setup FDM operator 2461 // -- Basis 2462 CeedBasis fdm_basis; 2463 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2464 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2465 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2466 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2467 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2468 CeedCall(CeedFree(&fdm_interp)); 2469 CeedCall(CeedFree(&grad_dummy)); 2470 CeedCall(CeedFree(&q_ref_dummy)); 2471 CeedCall(CeedFree(&q_weight_dummy)); 2472 CeedCall(CeedFree(&lambda)); 2473 2474 // -- Restriction 2475 CeedElemRestriction rstr_qd_i; 2476 CeedInt strides[3] = {1, elem_size, elem_size * num_comp}; 2477 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i)); 2478 // -- QFunction 2479 CeedQFunction qf_fdm; 2480 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2481 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2482 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2483 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2484 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2485 // -- QFunction context 2486 CeedInt *num_comp_data; 2487 CeedCall(CeedCalloc(1, &num_comp_data)); 2488 num_comp_data[0] = num_comp; 2489 CeedQFunctionContext ctx_fdm; 2490 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2491 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2492 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2493 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2494 // -- Operator 2495 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2496 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2497 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2498 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2499 2500 // Cleanup 2501 CeedCall(CeedVectorDestroy(&q_data)); 2502 CeedCall(CeedBasisDestroy(&fdm_basis)); 2503 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2504 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2505 2506 return CEED_ERROR_SUCCESS; 2507 } 2508 2509 /// @} 2510