1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <assert.h> 9 #include <ceed-impl.h> 10 #include <ceed/backend.h> 11 #include <ceed/ceed.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 // Check if NULL qf passed in 39 if (!qf) return CEED_ERROR_SUCCESS; 40 41 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 42 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 47 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 48 memcpy(source_path_with_name, qf->source_path, path_len); 49 memcpy(&source_path_with_name[path_len], ":", 1); 50 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 51 } else { 52 CeedCall(CeedCalloc(1, &source_path_with_name)); 53 } 54 55 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 56 { 57 CeedQFunctionContext ctx; 58 59 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 60 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 61 } 62 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 63 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 64 } 65 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 66 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 67 } 68 CeedCall(CeedFree(&source_path_with_name)); 69 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 Ceed ceed_fallback; 84 85 // Check not already created 86 if (op->op_fallback) return CEED_ERROR_SUCCESS; 87 88 // Fallback Ceed 89 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 90 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 91 92 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 93 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 94 95 // Clone Op 96 CeedOperator op_fallback; 97 if (op->is_composite) { 98 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 99 for (CeedInt i = 0; i < op->num_suboperators; i++) { 100 CeedOperator op_sub_fallback; 101 102 CeedCall(CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback)); 103 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 104 } 105 } else { 106 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 107 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 108 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 109 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 110 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 111 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 112 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, op->input_fields[i]->basis, 113 op->input_fields[i]->vec)); 114 } 115 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 116 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_restr, op->output_fields[i]->basis, 117 op->output_fields[i]->vec)); 118 } 119 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 120 if (op_fallback->num_qpts == 0) { 121 CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts)); 122 } 123 // Cleanup 124 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 125 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 126 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 127 } 128 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 129 CeedCall(CeedOperatorCheckReady(op_fallback)); 130 op->op_fallback = op_fallback; 131 132 return CEED_ERROR_SUCCESS; 133 } 134 135 /** 136 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 137 138 @param[in] op CeedOperator to retrieve fallback for 139 @param[out] op_fallback Fallback CeedOperator 140 141 @return An error code: 0 - success, otherwise - failure 142 143 @ref Developer 144 **/ 145 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 146 // Create if needed 147 if (!op->op_fallback) { 148 CeedCall(CeedOperatorCreateFallback(op)); 149 } 150 if (op->op_fallback) { 151 bool is_debug; 152 153 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 154 if (is_debug) { 155 Ceed ceed_fallback; 156 const char *resource, *resource_fallback; 157 158 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 159 CeedCall(CeedGetResource(op->ceed, &resource)); 160 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 161 162 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 163 CeedDebug(op->ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 164 op->op_fallback); 165 } 166 } 167 *op_fallback = op->op_fallback; 168 169 return CEED_ERROR_SUCCESS; 170 } 171 172 /** 173 @brief Select correct basis matrix pointer based on CeedEvalMode 174 175 @param[in] eval_mode Current basis evaluation mode 176 @param[in] identity Pointer to identity matrix 177 @param[in] interp Pointer to interpolation matrix 178 @param[in] grad Pointer to gradient matrix 179 @param[out] basis_ptr Basis pointer to set 180 181 @ref Developer 182 **/ 183 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad, 184 const CeedScalar **basis_ptr) { 185 switch (eval_mode) { 186 case CEED_EVAL_NONE: 187 *basis_ptr = identity; 188 break; 189 case CEED_EVAL_INTERP: 190 *basis_ptr = interp; 191 break; 192 case CEED_EVAL_GRAD: 193 *basis_ptr = grad; 194 break; 195 case CEED_EVAL_WEIGHT: 196 case CEED_EVAL_DIV: 197 case CEED_EVAL_CURL: 198 break; // Caught by QF Assembly 199 } 200 assert(*basis_ptr != NULL); 201 } 202 203 /** 204 @brief Create point block restriction for active operator field 205 206 @param[in] rstr Original CeedElemRestriction for active field 207 @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored 208 209 @return An error code: 0 - success, otherwise - failure 210 211 @ref Developer 212 **/ 213 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) { 214 Ceed ceed; 215 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 216 const CeedInt *offsets; 217 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 218 219 // Expand offsets 220 CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; 221 CeedSize l_size; 222 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 223 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 224 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 225 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 226 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 227 CeedInt shift = num_comp; 228 if (comp_stride != 1) shift *= num_comp; 229 CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets)); 230 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 231 pointblock_offsets[i] = offsets[i] * shift; 232 } 233 234 // Create new restriction 235 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 236 pointblock_offsets, pointblock_rstr)); 237 238 // Cleanup 239 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 240 241 return CEED_ERROR_SUCCESS; 242 } 243 244 /** 245 @brief Core logic for assembling operator diagonal or point block diagonal 246 247 @param[in] op CeedOperator to assemble point block diagonal 248 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 249 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 250 @param[out] assembled CeedVector to store assembled diagonal 251 252 @return An error code: 0 - success, otherwise - failure 253 254 @ref Developer 255 **/ 256 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 257 Ceed ceed; 258 CeedCall(CeedOperatorGetCeed(op, &ceed)); 259 260 // Assemble QFunction 261 CeedQFunction qf; 262 CeedCall(CeedOperatorGetQFunction(op, &qf)); 263 CeedInt num_input_fields, num_output_fields; 264 CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); 265 CeedVector assembled_qf; 266 CeedElemRestriction rstr; 267 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request)); 268 CeedInt layout[3]; 269 CeedCall(CeedElemRestrictionGetELayout(rstr, &layout)); 270 CeedCall(CeedElemRestrictionDestroy(&rstr)); 271 272 // Get assembly data 273 CeedOperatorAssemblyData data; 274 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 275 const CeedEvalMode *eval_mode_in, *eval_mode_out; 276 CeedInt num_eval_mode_in, num_eval_mode_out; 277 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out)); 278 CeedBasis basis_in, basis_out; 279 CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL)); 280 CeedInt num_comp; 281 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 282 283 // Assemble point block diagonal restriction, if needed 284 CeedElemRestriction diag_rstr; 285 CeedCall(CeedOperatorGetActiveElemRestriction(op, &diag_rstr)); 286 if (is_pointblock) { 287 CeedElemRestriction point_block_rstr; 288 CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_rstr, &point_block_rstr)); 289 diag_rstr = point_block_rstr; 290 } 291 292 // Create diagonal vector 293 CeedVector elem_diag; 294 CeedCall(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag)); 295 296 // Assemble element operator diagonals 297 CeedScalar *elem_diag_array; 298 const CeedScalar *assembled_qf_array; 299 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 300 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 301 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 302 CeedInt num_elem, num_nodes, num_qpts; 303 CeedCall(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem)); 304 CeedCall(CeedBasisGetNumNodes(basis_in, &num_nodes)); 305 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 306 307 // Basis matrices 308 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 309 CeedScalar *identity = NULL; 310 bool has_eval_none = false; 311 for (CeedInt i = 0; i < num_eval_mode_in; i++) { 312 has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE); 313 } 314 for (CeedInt i = 0; i < num_eval_mode_out; i++) { 315 has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE); 316 } 317 if (has_eval_none) { 318 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 319 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 320 } 321 CeedCall(CeedBasisGetInterp(basis_in, &interp_in)); 322 CeedCall(CeedBasisGetInterp(basis_out, &interp_out)); 323 CeedCall(CeedBasisGetGrad(basis_in, &grad_in)); 324 CeedCall(CeedBasisGetGrad(basis_out, &grad_out)); 325 // Compute the diagonal of B^T D B 326 // Each element 327 for (CeedInt e = 0; e < num_elem; e++) { 328 CeedInt d_out = -1; 329 // Each basis eval mode pair 330 for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) { 331 const CeedScalar *bt = NULL; 332 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1; 333 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes], &bt); 334 CeedInt d_in = -1; 335 for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) { 336 const CeedScalar *b = NULL; 337 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1; 338 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes], &b); 339 // Each component 340 for (CeedInt c_out = 0; c_out < num_comp; c_out++) { 341 // Each qpoint/node pair 342 for (CeedInt q = 0; q < num_qpts; q++) { 343 if (is_pointblock) { 344 // Point Block Diagonal 345 for (CeedInt c_in = 0; c_in < num_comp; c_in++) { 346 const CeedScalar qf_value = 347 assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_in) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] + 348 e * layout[2]]; 349 for (CeedInt n = 0; n < num_nodes; n++) { 350 elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] += 351 bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n]; 352 } 353 } 354 } else { 355 // Diagonal Only 356 const CeedScalar qf_value = 357 assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_out) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] + 358 e * layout[2]]; 359 for (CeedInt n = 0; n < num_nodes; n++) { 360 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n]; 361 } 362 } 363 } 364 } 365 } 366 } 367 } 368 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 369 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 370 371 // Assemble local operator diagonal 372 CeedCall(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 373 374 // Cleanup 375 if (is_pointblock) { 376 CeedCall(CeedElemRestrictionDestroy(&diag_rstr)); 377 } 378 CeedCall(CeedVectorDestroy(&assembled_qf)); 379 CeedCall(CeedVectorDestroy(&elem_diag)); 380 CeedCall(CeedFree(&identity)); 381 382 return CEED_ERROR_SUCCESS; 383 } 384 385 /** 386 @brief Core logic for assembling composite operator diagonal 387 388 @param[in] op CeedOperator to assemble point block diagonal 389 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 390 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 391 @param[out] assembled CeedVector to store assembled diagonal 392 393 @return An error code: 0 - success, otherwise - failure 394 395 @ref Developer 396 **/ 397 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock, 398 CeedVector assembled) { 399 CeedInt num_sub; 400 CeedOperator *suboperators; 401 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 402 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 403 for (CeedInt i = 0; i < num_sub; i++) { 404 if (is_pointblock) { 405 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 406 } else { 407 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 408 } 409 } 410 return CEED_ERROR_SUCCESS; 411 } 412 413 /** 414 @brief Build nonzero pattern for non-composite operator 415 416 Users should generally use CeedOperatorLinearAssembleSymbolic() 417 418 @param[in] op CeedOperator to assemble nonzero pattern 419 @param[in] offset Offset for number of entries 420 @param[out] rows Row number for each entry 421 @param[out] cols Column number for each entry 422 423 @return An error code: 0 - success, otherwise - failure 424 425 @ref Developer 426 **/ 427 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 428 Ceed ceed = op->ceed; 429 if (op->is_composite) { 430 // LCOV_EXCL_START 431 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 432 // LCOV_EXCL_STOP 433 } 434 435 CeedSize num_nodes; 436 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL)); 437 CeedElemRestriction rstr_in; 438 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in)); 439 CeedInt num_elem, elem_size, num_comp; 440 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); 441 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); 442 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); 443 CeedInt layout_er[3]; 444 CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er)); 445 446 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 447 448 // Determine elem_dof relation 449 CeedVector index_vec; 450 CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec)); 451 CeedScalar *array; 452 CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array)); 453 for (CeedInt i = 0; i < num_nodes; i++) array[i] = i; 454 CeedCall(CeedVectorRestoreArray(index_vec, &array)); 455 CeedVector elem_dof; 456 CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof)); 457 CeedCall(CeedVectorSetValue(elem_dof, 0.0)); 458 CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE)); 459 const CeedScalar *elem_dof_a; 460 CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a)); 461 CeedCall(CeedVectorDestroy(&index_vec)); 462 463 // Determine i, j locations for element matrices 464 CeedInt count = 0; 465 for (CeedInt e = 0; e < num_elem; e++) { 466 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 467 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 468 for (CeedInt i = 0; i < elem_size; i++) { 469 for (CeedInt j = 0; j < elem_size; j++) { 470 const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2]; 471 const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2]; 472 473 const CeedInt row = elem_dof_a[elem_dof_index_row]; 474 const CeedInt col = elem_dof_a[elem_dof_index_col]; 475 476 rows[offset + count] = row; 477 cols[offset + count] = col; 478 count++; 479 } 480 } 481 } 482 } 483 } 484 if (count != local_num_entries) { 485 // LCOV_EXCL_START 486 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 487 // LCOV_EXCL_STOP 488 } 489 CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a)); 490 CeedCall(CeedVectorDestroy(&elem_dof)); 491 492 return CEED_ERROR_SUCCESS; 493 } 494 495 /** 496 @brief Assemble nonzero entries for non-composite operator 497 498 Users should generally use CeedOperatorLinearAssemble() 499 500 @param[in] op CeedOperator to assemble 501 @param[in] offset Offset for number of entries 502 @param[out] values Values to assemble into matrix 503 504 @return An error code: 0 - success, otherwise - failure 505 506 @ref Developer 507 **/ 508 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 509 Ceed ceed = op->ceed; 510 if (op->is_composite) { 511 // LCOV_EXCL_START 512 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 513 // LCOV_EXCL_STOP 514 } 515 if (op->num_elem == 0) return CEED_ERROR_SUCCESS; 516 517 if (op->LinearAssembleSingle) { 518 // Backend version 519 CeedCall(op->LinearAssembleSingle(op, offset, values)); 520 return CEED_ERROR_SUCCESS; 521 } else { 522 // Operator fallback 523 CeedOperator op_fallback; 524 525 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 526 if (op_fallback) { 527 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 528 return CEED_ERROR_SUCCESS; 529 } 530 } 531 532 // Assemble QFunction 533 CeedQFunction qf; 534 CeedCall(CeedOperatorGetQFunction(op, &qf)); 535 CeedVector assembled_qf; 536 CeedElemRestriction rstr_q; 537 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE)); 538 CeedSize qf_length; 539 CeedCall(CeedVectorGetLength(assembled_qf, &qf_length)); 540 541 CeedInt num_input_fields, num_output_fields; 542 CeedOperatorField *input_fields; 543 CeedOperatorField *output_fields; 544 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 545 546 // Get assembly data 547 CeedOperatorAssemblyData data; 548 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 549 const CeedEvalMode *eval_mode_in, *eval_mode_out; 550 CeedInt num_eval_mode_in, num_eval_mode_out; 551 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out)); 552 CeedBasis basis_in, basis_out; 553 CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL)); 554 555 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) { 556 // LCOV_EXCL_START 557 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs"); 558 // LCOV_EXCL_STOP 559 } 560 561 CeedElemRestriction active_rstr; 562 CeedInt num_elem, elem_size, num_qpts, num_comp; 563 CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr)); 564 CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem)); 565 CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size)); 566 CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp)); 567 CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); 568 569 CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 570 571 // loop over elements and put in data structure 572 const CeedScalar *interp_in, *grad_in; 573 CeedCall(CeedBasisGetInterp(basis_in, &interp_in)); 574 CeedCall(CeedBasisGetGrad(basis_in, &grad_in)); 575 576 const CeedScalar *assembled_qf_array; 577 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 578 579 CeedInt layout_qf[3]; 580 CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf)); 581 CeedCall(CeedElemRestrictionDestroy(&rstr_q)); 582 583 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 584 const CeedScalar *B_mat_in, *B_mat_out; 585 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL, &B_mat_out)); 586 CeedScalar BTD_mat[elem_size * num_qpts * num_eval_mode_in]; 587 CeedScalar elem_mat[elem_size * elem_size]; 588 CeedInt count = 0; 589 CeedScalar *vals; 590 CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals)); 591 for (CeedInt e = 0; e < num_elem; e++) { 592 for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { 593 for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { 594 // Compute B^T*D 595 for (CeedInt n = 0; n < elem_size; n++) { 596 for (CeedInt q = 0; q < num_qpts; q++) { 597 for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) { 598 const CeedInt btd_index = n * (num_qpts * num_eval_mode_in) + (num_eval_mode_in * q + e_in); 599 CeedScalar sum = 0.0; 600 for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) { 601 const CeedInt b_out_index = (num_eval_mode_out * q + e_out) * elem_size + n; 602 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out; 603 const CeedInt qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 604 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 605 } 606 BTD_mat[btd_index] = sum; 607 } 608 } 609 } 610 // form element matrix itself (for each block component) 611 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_mode_in)); 612 613 // put element matrix in coordinate data structure 614 for (CeedInt i = 0; i < elem_size; i++) { 615 for (CeedInt j = 0; j < elem_size; j++) { 616 vals[offset + count] = elem_mat[i * elem_size + j]; 617 count++; 618 } 619 } 620 } 621 } 622 } 623 if (count != local_num_entries) { 624 // LCOV_EXCL_START 625 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 626 // LCOV_EXCL_STOP 627 } 628 CeedCall(CeedVectorRestoreArray(values, &vals)); 629 630 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 631 CeedCall(CeedVectorDestroy(&assembled_qf)); 632 633 return CEED_ERROR_SUCCESS; 634 } 635 636 /** 637 @brief Count number of entries for assembled CeedOperator 638 639 @param[in] op CeedOperator to assemble 640 @param[out] num_entries Number of entries in assembled representation 641 642 @return An error code: 0 - success, otherwise - failure 643 644 @ref Utility 645 **/ 646 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) { 647 CeedElemRestriction rstr; 648 CeedInt num_elem, elem_size, num_comp; 649 650 if (op->is_composite) { 651 // LCOV_EXCL_START 652 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 653 // LCOV_EXCL_STOP 654 } 655 CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr)); 656 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 657 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 658 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 659 *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem; 660 661 return CEED_ERROR_SUCCESS; 662 } 663 664 /** 665 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 666 667 @param[in] op_fine Fine grid operator 668 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 669 @param[in] rstr_coarse Coarse grid restriction 670 @param[in] basis_coarse Coarse grid active vector basis 671 @param[in] basis_c_to_f Basis for coarse to fine interpolation 672 @param[out] op_coarse Coarse grid operator 673 @param[out] op_prolong Coarse to fine operator 674 @param[out] op_restrict Fine to coarse operator 675 676 @return An error code: 0 - success, otherwise - failure 677 678 @ref Developer 679 **/ 680 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 681 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 682 Ceed ceed; 683 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 684 685 // Check for composite operator 686 bool is_composite; 687 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 688 if (is_composite) { 689 // LCOV_EXCL_START 690 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 691 // LCOV_EXCL_STOP 692 } 693 694 // Coarse Grid 695 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 696 CeedElemRestriction rstr_fine = NULL; 697 // -- Clone input fields 698 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 699 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 700 rstr_fine = op_fine->input_fields[i]->elem_restr; 701 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 702 } else { 703 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_restr, 704 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 705 } 706 } 707 // -- Clone output fields 708 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 709 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 710 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 711 } else { 712 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_restr, 713 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 714 } 715 } 716 // -- Clone QFunctionAssemblyData 717 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 718 719 // Multiplicity vector 720 CeedVector mult_vec, mult_e_vec; 721 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 722 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 723 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 724 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 725 CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 726 CeedCall(CeedVectorDestroy(&mult_e_vec)); 727 CeedCall(CeedVectorReciprocal(mult_vec)); 728 729 // Restriction 730 CeedInt num_comp; 731 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 732 CeedQFunction qf_restrict; 733 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 734 CeedInt *num_comp_r_data; 735 CeedCall(CeedCalloc(1, &num_comp_r_data)); 736 num_comp_r_data[0] = num_comp; 737 CeedQFunctionContext ctx_r; 738 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 739 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 740 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 741 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 742 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 743 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 744 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 745 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 746 747 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 748 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 749 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 750 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 751 752 // Prolongation 753 CeedQFunction qf_prolong; 754 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 755 CeedInt *num_comp_p_data; 756 CeedCall(CeedCalloc(1, &num_comp_p_data)); 757 num_comp_p_data[0] = num_comp; 758 CeedQFunctionContext ctx_p; 759 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 760 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 761 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 762 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 763 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 764 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 765 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 766 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 767 768 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 769 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 770 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec)); 771 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE)); 772 773 // Clone name 774 bool has_name = op_fine->name; 775 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 776 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 777 { 778 char *prolongation_name; 779 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 780 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 781 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 782 CeedCall(CeedFree(&prolongation_name)); 783 } 784 { 785 char *restriction_name; 786 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 787 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 788 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 789 CeedCall(CeedFree(&restriction_name)); 790 } 791 792 // Check 793 CeedCall(CeedOperatorCheckReady(*op_coarse)); 794 CeedCall(CeedOperatorCheckReady(*op_prolong)); 795 CeedCall(CeedOperatorCheckReady(*op_restrict)); 796 797 // Cleanup 798 CeedCall(CeedVectorDestroy(&mult_vec)); 799 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 800 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 801 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 802 803 return CEED_ERROR_SUCCESS; 804 } 805 806 /** 807 @brief Build 1D mass matrix and Laplacian with perturbation 808 809 @param[in] interp_1d Interpolation matrix in one dimension 810 @param[in] grad_1d Gradient matrix in one dimension 811 @param[in] q_weight_1d Quadrature weights in one dimension 812 @param[in] P_1d Number of basis nodes in one dimension 813 @param[in] Q_1d Number of quadrature points in one dimension 814 @param[in] dim Dimension of basis 815 @param[out] mass Assembled mass matrix in one dimension 816 @param[out] laplace Assembled perturbed Laplacian in one dimension 817 818 @return An error code: 0 - success, otherwise - failure 819 820 @ref Developer 821 **/ 822 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, 823 CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 824 for (CeedInt i = 0; i < P_1d; i++) { 825 for (CeedInt j = 0; j < P_1d; j++) { 826 CeedScalar sum = 0.0; 827 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 828 mass[i + j * P_1d] = sum; 829 } 830 } 831 // -- Laplacian 832 for (CeedInt i = 0; i < P_1d; i++) { 833 for (CeedInt j = 0; j < P_1d; j++) { 834 CeedScalar sum = 0.0; 835 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 836 laplace[i + j * P_1d] = sum; 837 } 838 } 839 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 840 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 841 return CEED_ERROR_SUCCESS; 842 } 843 CeedPragmaOptimizeOn; 844 845 /// @} 846 847 /// ---------------------------------------------------------------------------- 848 /// CeedOperator Backend API 849 /// ---------------------------------------------------------------------------- 850 /// @addtogroup CeedOperatorBackend 851 /// @{ 852 853 /** 854 @brief Create object holding CeedQFunction assembly data for CeedOperator 855 856 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 857 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 858 859 @return An error code: 0 - success, otherwise - failure 860 861 @ref Backend 862 **/ 863 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 864 CeedCall(CeedCalloc(1, data)); 865 (*data)->ref_count = 1; 866 (*data)->ceed = ceed; 867 CeedCall(CeedReference(ceed)); 868 869 return CEED_ERROR_SUCCESS; 870 } 871 872 /** 873 @brief Increment the reference counter for a CeedQFunctionAssemblyData 874 875 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 876 877 @return An error code: 0 - success, otherwise - failure 878 879 @ref Backend 880 **/ 881 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 882 data->ref_count++; 883 return CEED_ERROR_SUCCESS; 884 } 885 886 /** 887 @brief Set re-use of CeedQFunctionAssemblyData 888 889 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 890 @param[in] reuse_data Boolean flag indicating data re-use 891 892 @return An error code: 0 - success, otherwise - failure 893 894 @ref Backend 895 **/ 896 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 897 data->reuse_data = reuse_data; 898 data->needs_data_update = true; 899 return CEED_ERROR_SUCCESS; 900 } 901 902 /** 903 @brief Mark QFunctionAssemblyData as stale 904 905 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 906 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 907 908 @return An error code: 0 - success, otherwise - failure 909 910 @ref Backend 911 **/ 912 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 913 data->needs_data_update = needs_data_update; 914 return CEED_ERROR_SUCCESS; 915 } 916 917 /** 918 @brief Determine if QFunctionAssemblyData needs update 919 920 @param[in] data CeedQFunctionAssemblyData to mark as stale 921 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 922 923 @return An error code: 0 - success, otherwise - failure 924 925 @ref Backend 926 **/ 927 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 928 *is_update_needed = !data->reuse_data || data->needs_data_update; 929 return CEED_ERROR_SUCCESS; 930 } 931 932 /** 933 @brief Copy the pointer to a CeedQFunctionAssemblyData. 934 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 935 Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData. 936 This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData. 937 938 @param[in] data CeedQFunctionAssemblyData to copy reference to 939 @param[in,out] data_copy Variable to store copied reference 940 941 @return An error code: 0 - success, otherwise - failure 942 943 @ref Backend 944 **/ 945 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 946 CeedCall(CeedQFunctionAssemblyDataReference(data)); 947 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 948 *data_copy = data; 949 return CEED_ERROR_SUCCESS; 950 } 951 952 /** 953 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 954 955 @param[in] data CeedQFunctionAssemblyData to retrieve status 956 @param[out] is_setup Boolean flag for setup status 957 958 @return An error code: 0 - success, otherwise - failure 959 960 @ref Backend 961 **/ 962 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 963 *is_setup = data->is_setup; 964 return CEED_ERROR_SUCCESS; 965 } 966 967 /** 968 @brief Set internal objects for CeedQFunctionAssemblyData 969 970 @param[in,out] data CeedQFunctionAssemblyData to set objects 971 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 972 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 973 974 @return An error code: 0 - success, otherwise - failure 975 976 @ref Backend 977 **/ 978 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 979 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 980 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 981 982 data->is_setup = true; 983 return CEED_ERROR_SUCCESS; 984 } 985 986 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 987 if (!data->is_setup) { 988 // LCOV_EXCL_START 989 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 990 // LCOV_EXCL_STOP 991 } 992 993 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 994 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 995 996 return CEED_ERROR_SUCCESS; 997 } 998 999 /** 1000 @brief Destroy CeedQFunctionAssemblyData 1001 1002 @param[in,out] data CeedQFunctionAssemblyData to destroy 1003 1004 @return An error code: 0 - success, otherwise - failure 1005 1006 @ref Backend 1007 **/ 1008 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1009 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1010 1011 CeedCall(CeedDestroy(&(*data)->ceed)); 1012 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1013 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1014 1015 CeedCall(CeedFree(data)); 1016 return CEED_ERROR_SUCCESS; 1017 } 1018 1019 /** 1020 @brief Get CeedOperatorAssemblyData 1021 1022 @param[in] op CeedOperator to assemble 1023 @param[out] data CeedQFunctionAssemblyData 1024 1025 @return An error code: 0 - success, otherwise - failure 1026 1027 @ref Backend 1028 **/ 1029 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1030 if (!op->op_assembled) { 1031 CeedOperatorAssemblyData data; 1032 1033 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1034 op->op_assembled = data; 1035 } 1036 *data = op->op_assembled; 1037 1038 return CEED_ERROR_SUCCESS; 1039 } 1040 1041 /** 1042 @brief Create object holding CeedOperator assembly data 1043 1044 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1045 @param[in] op CeedOperator to be assembled 1046 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1047 1048 @return An error code: 0 - success, otherwise - failure 1049 1050 @ref Backend 1051 **/ 1052 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1053 CeedCall(CeedCalloc(1, data)); 1054 (*data)->ceed = ceed; 1055 CeedCall(CeedReference(ceed)); 1056 1057 // Build OperatorAssembly data 1058 CeedQFunction qf; 1059 CeedQFunctionField *qf_fields; 1060 CeedOperatorField *op_fields; 1061 CeedInt num_input_fields; 1062 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1063 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1064 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1065 1066 // Determine active input basis 1067 CeedInt num_eval_mode_in = 0, dim = 1; 1068 CeedEvalMode *eval_mode_in = NULL; 1069 CeedBasis basis_in = NULL; 1070 for (CeedInt i = 0; i < num_input_fields; i++) { 1071 CeedVector vec; 1072 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1073 if (vec == CEED_VECTOR_ACTIVE) { 1074 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1075 CeedCall(CeedBasisGetDimension(basis_in, &dim)); 1076 CeedEvalMode eval_mode; 1077 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1078 switch (eval_mode) { 1079 case CEED_EVAL_NONE: 1080 case CEED_EVAL_INTERP: 1081 CeedCall(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in)); 1082 eval_mode_in[num_eval_mode_in] = eval_mode; 1083 num_eval_mode_in += 1; 1084 break; 1085 case CEED_EVAL_GRAD: 1086 CeedCall(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in)); 1087 for (CeedInt d = 0; d < dim; d++) { 1088 eval_mode_in[num_eval_mode_in + d] = eval_mode; 1089 } 1090 num_eval_mode_in += dim; 1091 break; 1092 case CEED_EVAL_WEIGHT: 1093 case CEED_EVAL_DIV: 1094 case CEED_EVAL_CURL: 1095 break; // Caught by QF Assembly 1096 } 1097 } 1098 } 1099 (*data)->num_eval_mode_in = num_eval_mode_in; 1100 (*data)->eval_mode_in = eval_mode_in; 1101 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->basis_in)); 1102 1103 // Determine active output basis 1104 CeedInt num_output_fields; 1105 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1106 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1107 CeedInt num_eval_mode_out = 0; 1108 CeedEvalMode *eval_mode_out = NULL; 1109 CeedBasis basis_out = NULL; 1110 for (CeedInt i = 0; i < num_output_fields; i++) { 1111 CeedVector vec; 1112 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1113 if (vec == CEED_VECTOR_ACTIVE) { 1114 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1115 CeedEvalMode eval_mode; 1116 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1117 switch (eval_mode) { 1118 case CEED_EVAL_NONE: 1119 case CEED_EVAL_INTERP: 1120 CeedCall(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out)); 1121 eval_mode_out[num_eval_mode_out] = eval_mode; 1122 num_eval_mode_out += 1; 1123 break; 1124 case CEED_EVAL_GRAD: 1125 CeedCall(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out)); 1126 for (CeedInt d = 0; d < dim; d++) { 1127 eval_mode_out[num_eval_mode_out + d] = eval_mode; 1128 } 1129 num_eval_mode_out += dim; 1130 break; 1131 case CEED_EVAL_WEIGHT: 1132 case CEED_EVAL_DIV: 1133 case CEED_EVAL_CURL: 1134 break; // Caught by QF Assembly 1135 } 1136 } 1137 } 1138 (*data)->num_eval_mode_out = num_eval_mode_out; 1139 (*data)->eval_mode_out = eval_mode_out; 1140 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->basis_out)); 1141 1142 return CEED_ERROR_SUCCESS; 1143 } 1144 1145 /** 1146 @brief Get CeedOperator CeedEvalModes for assembly 1147 1148 @param[in] data CeedOperatorAssemblyData 1149 @param[out] num_eval_mode_in Pointer to hold number of input CeedEvalModes, or NULL 1150 @param[out] eval_mode_in Pointer to hold input CeedEvalModes, or NULL 1151 @param[out] num_eval_mode_out Pointer to hold number of output CeedEvalModes, or NULL 1152 @param[out] eval_mode_out Pointer to hold output CeedEvalModes, or NULL 1153 1154 @return An error code: 0 - success, otherwise - failure 1155 1156 @ref Backend 1157 **/ 1158 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in, 1159 CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) { 1160 if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in; 1161 if (eval_mode_in) *eval_mode_in = data->eval_mode_in; 1162 if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out; 1163 if (eval_mode_out) *eval_mode_out = data->eval_mode_out; 1164 1165 return CEED_ERROR_SUCCESS; 1166 } 1167 1168 /** 1169 @brief Get CeedOperator CeedBasis data for assembly 1170 1171 @param[in] data CeedOperatorAssemblyData 1172 @param[out] basis_in Pointer to hold active input CeedBasis, or NULL 1173 @param[out] B_in Pointer to hold assembled active input B, or NULL 1174 @param[out] basis_out Pointer to hold active output CeedBasis, or NULL 1175 @param[out] B_out Pointer to hold assembled active output B, or NULL 1176 1177 @return An error code: 0 - success, otherwise - failure 1178 1179 @ref Backend 1180 **/ 1181 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out, 1182 const CeedScalar **B_out) { 1183 // Assemble B_in, B_out if needed 1184 if (B_in && !data->B_in) { 1185 CeedInt num_qpts, elem_size; 1186 CeedScalar *B_in, *identity = NULL; 1187 const CeedScalar *interp_in, *grad_in; 1188 bool has_eval_none = false; 1189 1190 CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts)); 1191 CeedCall(CeedBasisGetNumNodes(data->basis_in, &elem_size)); 1192 CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in)); 1193 1194 for (CeedInt i = 0; i < data->num_eval_mode_in; i++) { 1195 has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE); 1196 } 1197 if (has_eval_none) { 1198 CeedCall(CeedCalloc(num_qpts * elem_size, &identity)); 1199 for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { 1200 identity[i * elem_size + i] = 1.0; 1201 } 1202 } 1203 CeedCall(CeedBasisGetInterp(data->basis_in, &interp_in)); 1204 CeedCall(CeedBasisGetGrad(data->basis_in, &grad_in)); 1205 1206 for (CeedInt q = 0; q < num_qpts; q++) { 1207 for (CeedInt n = 0; n < elem_size; n++) { 1208 CeedInt d_in = -1; 1209 for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) { 1210 const CeedInt qq = data->num_eval_mode_in * q; 1211 const CeedScalar *b = NULL; 1212 1213 if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++; 1214 CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &b); 1215 B_in[(qq + e_in) * elem_size + n] = b[q * elem_size + n]; 1216 } 1217 } 1218 } 1219 data->B_in = B_in; 1220 } 1221 1222 if (B_out && !data->B_out) { 1223 CeedInt num_qpts, elem_size; 1224 CeedScalar *B_out, *identity = NULL; 1225 const CeedScalar *interp_out, *grad_out; 1226 bool has_eval_none = false; 1227 1228 CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts)); 1229 CeedCall(CeedBasisGetNumNodes(data->basis_out, &elem_size)); 1230 CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out)); 1231 1232 for (CeedInt i = 0; i < data->num_eval_mode_out; i++) { 1233 has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE); 1234 } 1235 if (has_eval_none) { 1236 CeedCall(CeedCalloc(num_qpts * elem_size, &identity)); 1237 for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { 1238 identity[i * elem_size + i] = 1.0; 1239 } 1240 } 1241 CeedCall(CeedBasisGetInterp(data->basis_out, &interp_out)); 1242 CeedCall(CeedBasisGetGrad(data->basis_out, &grad_out)); 1243 1244 for (CeedInt q = 0; q < num_qpts; q++) { 1245 for (CeedInt n = 0; n < elem_size; n++) { 1246 CeedInt d_out = -1; 1247 for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) { 1248 const CeedInt qq = data->num_eval_mode_out * q; 1249 const CeedScalar *b = NULL; 1250 1251 if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++; 1252 CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &b); 1253 B_out[(qq + e_out) * elem_size + n] = b[q * elem_size + n]; 1254 } 1255 } 1256 } 1257 data->B_out = B_out; 1258 } 1259 1260 if (basis_in) *basis_in = data->basis_in; 1261 if (B_in) *B_in = data->B_in; 1262 if (basis_out) *basis_out = data->basis_out; 1263 if (B_out) *B_out = data->B_out; 1264 1265 return CEED_ERROR_SUCCESS; 1266 } 1267 1268 /** 1269 @brief Destroy CeedOperatorAssemblyData 1270 1271 @param[in,out] data CeedOperatorAssemblyData to destroy 1272 1273 @return An error code: 0 - success, otherwise - failure 1274 1275 @ref Backend 1276 **/ 1277 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1278 if (!*data) return CEED_ERROR_SUCCESS; 1279 1280 CeedCall(CeedDestroy(&(*data)->ceed)); 1281 CeedCall(CeedBasisDestroy(&(*data)->basis_in)); 1282 CeedCall(CeedBasisDestroy(&(*data)->basis_out)); 1283 CeedCall(CeedFree(&(*data)->eval_mode_in)); 1284 CeedCall(CeedFree(&(*data)->eval_mode_out)); 1285 CeedCall(CeedFree(&(*data)->B_in)); 1286 CeedCall(CeedFree(&(*data)->B_out)); 1287 1288 CeedCall(CeedFree(data)); 1289 return CEED_ERROR_SUCCESS; 1290 } 1291 1292 /// @} 1293 1294 /// ---------------------------------------------------------------------------- 1295 /// CeedOperator Public API 1296 /// ---------------------------------------------------------------------------- 1297 /// @addtogroup CeedOperatorUser 1298 /// @{ 1299 1300 /** 1301 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1302 1303 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1304 The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices 1305 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the 1306 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, 1307 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] 1308 and producing the output [dv_0, dv_1, v]. 1309 1310 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1311 1312 @param[in] op CeedOperator to assemble CeedQFunction 1313 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1314 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1315 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1316 1317 @return An error code: 0 - success, otherwise - failure 1318 1319 @ref User 1320 **/ 1321 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1322 CeedCall(CeedOperatorCheckReady(op)); 1323 1324 if (op->LinearAssembleQFunction) { 1325 // Backend version 1326 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1327 } else { 1328 // Operator fallback 1329 CeedOperator op_fallback; 1330 1331 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1332 if (op_fallback) { 1333 CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1334 } else { 1335 // LCOV_EXCL_START 1336 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1337 // LCOV_EXCL_STOP 1338 } 1339 } 1340 return CEED_ERROR_SUCCESS; 1341 } 1342 1343 /** 1344 @brief Assemble CeedQFunction and store result internally. 1345 Return copied references of stored data to the caller. 1346 Caller is responsible for ownership and destruction of the copied references. 1347 See also @ref CeedOperatorLinearAssembleQFunction 1348 1349 @param[in] op CeedOperator to assemble CeedQFunction 1350 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1351 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1352 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1353 1354 @return An error code: 0 - success, otherwise - failure 1355 1356 @ref User 1357 **/ 1358 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1359 CeedCall(CeedOperatorCheckReady(op)); 1360 1361 if (op->LinearAssembleQFunctionUpdate) { 1362 // Backend version 1363 bool qf_assembled_is_setup; 1364 CeedVector assembled_vec = NULL; 1365 CeedElemRestriction assembled_rstr = NULL; 1366 1367 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1368 if (qf_assembled_is_setup) { 1369 bool update_needed; 1370 1371 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1372 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1373 if (update_needed) { 1374 CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request)); 1375 } 1376 } else { 1377 CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request)); 1378 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1379 } 1380 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1381 1382 // Copy reference from internally held copy 1383 *assembled = NULL; 1384 *rstr = NULL; 1385 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1386 CeedCall(CeedVectorDestroy(&assembled_vec)); 1387 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1388 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1389 } else { 1390 // Operator fallback 1391 CeedOperator op_fallback; 1392 1393 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1394 if (op_fallback) { 1395 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1396 } else { 1397 // LCOV_EXCL_START 1398 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1399 // LCOV_EXCL_STOP 1400 } 1401 } 1402 1403 return CEED_ERROR_SUCCESS; 1404 } 1405 1406 /** 1407 @brief Assemble the diagonal of a square linear CeedOperator 1408 1409 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1410 1411 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1412 1413 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1414 1415 @param[in] op CeedOperator to assemble CeedQFunction 1416 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1417 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1418 1419 @return An error code: 0 - success, otherwise - failure 1420 1421 @ref User 1422 **/ 1423 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1424 CeedCall(CeedOperatorCheckReady(op)); 1425 1426 CeedSize input_size = 0, output_size = 0; 1427 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1428 if (input_size != output_size) { 1429 // LCOV_EXCL_START 1430 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1431 // LCOV_EXCL_STOP 1432 } 1433 1434 if (op->LinearAssembleDiagonal) { 1435 // Backend version 1436 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1437 return CEED_ERROR_SUCCESS; 1438 } else if (op->LinearAssembleAddDiagonal) { 1439 // Backend version with zeroing first 1440 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1441 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1442 return CEED_ERROR_SUCCESS; 1443 } else { 1444 // Operator fallback 1445 CeedOperator op_fallback; 1446 1447 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1448 if (op_fallback) { 1449 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1450 return CEED_ERROR_SUCCESS; 1451 } 1452 } 1453 // Default interface implementation 1454 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1455 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1456 1457 return CEED_ERROR_SUCCESS; 1458 } 1459 1460 /** 1461 @brief Assemble the diagonal of a square linear CeedOperator 1462 1463 This sums into a CeedVector the diagonal of a linear CeedOperator. 1464 1465 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1466 1467 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1468 1469 @param[in] op CeedOperator to assemble CeedQFunction 1470 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1471 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1472 1473 @return An error code: 0 - success, otherwise - failure 1474 1475 @ref User 1476 **/ 1477 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1478 CeedCall(CeedOperatorCheckReady(op)); 1479 1480 CeedSize input_size = 0, output_size = 0; 1481 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1482 if (input_size != output_size) { 1483 // LCOV_EXCL_START 1484 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1485 // LCOV_EXCL_STOP 1486 } 1487 1488 if (op->LinearAssembleAddDiagonal) { 1489 // Backend version 1490 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1491 return CEED_ERROR_SUCCESS; 1492 } else { 1493 // Operator fallback 1494 CeedOperator op_fallback; 1495 1496 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1497 if (op_fallback) { 1498 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1499 return CEED_ERROR_SUCCESS; 1500 } 1501 } 1502 // Default interface implementation 1503 bool is_composite; 1504 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1505 if (is_composite) { 1506 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1507 } else { 1508 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1509 } 1510 1511 return CEED_ERROR_SUCCESS; 1512 } 1513 1514 /** 1515 @brief Assemble the point block diagonal of a square linear CeedOperator 1516 1517 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 1518 1519 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1520 1521 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1522 1523 @param[in] op CeedOperator to assemble CeedQFunction 1524 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1525 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1526 component in]. 1527 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1528 1529 @return An error code: 0 - success, otherwise - failure 1530 1531 @ref User 1532 **/ 1533 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1534 CeedCall(CeedOperatorCheckReady(op)); 1535 1536 CeedSize input_size = 0, output_size = 0; 1537 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1538 if (input_size != output_size) { 1539 // LCOV_EXCL_START 1540 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1541 // LCOV_EXCL_STOP 1542 } 1543 1544 if (op->LinearAssemblePointBlockDiagonal) { 1545 // Backend version 1546 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 1547 return CEED_ERROR_SUCCESS; 1548 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1549 // Backend version with zeroing first 1550 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1551 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1552 return CEED_ERROR_SUCCESS; 1553 } else { 1554 // Operator fallback 1555 CeedOperator op_fallback; 1556 1557 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1558 if (op_fallback) { 1559 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 1560 return CEED_ERROR_SUCCESS; 1561 } 1562 } 1563 // Default interface implementation 1564 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1565 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1566 1567 return CEED_ERROR_SUCCESS; 1568 } 1569 1570 /** 1571 @brief Assemble the point block diagonal of a square linear CeedOperator 1572 1573 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 1574 1575 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1576 1577 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1578 1579 @param[in] op CeedOperator to assemble CeedQFunction 1580 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 1581 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 1582 component in]. 1583 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1584 1585 @return An error code: 0 - success, otherwise - failure 1586 1587 @ref User 1588 **/ 1589 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1590 CeedCall(CeedOperatorCheckReady(op)); 1591 1592 CeedSize input_size = 0, output_size = 0; 1593 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1594 if (input_size != output_size) { 1595 // LCOV_EXCL_START 1596 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1597 // LCOV_EXCL_STOP 1598 } 1599 1600 if (op->LinearAssembleAddPointBlockDiagonal) { 1601 // Backend version 1602 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 1603 return CEED_ERROR_SUCCESS; 1604 } else { 1605 // Operator fallback 1606 CeedOperator op_fallback; 1607 1608 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1609 if (op_fallback) { 1610 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 1611 return CEED_ERROR_SUCCESS; 1612 } 1613 } 1614 // Default interface implementation 1615 bool is_composite; 1616 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1617 if (is_composite) { 1618 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 1619 } else { 1620 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 1621 } 1622 1623 return CEED_ERROR_SUCCESS; 1624 } 1625 1626 /** 1627 @brief Fully assemble the nonzero pattern of a linear operator. 1628 1629 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 1630 1631 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1632 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j) 1633 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 1634 1635 This will generally be slow unless your operator is low-order. 1636 1637 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1638 1639 @param[in] op CeedOperator to assemble 1640 @param[out] num_entries Number of entries in coordinate nonzero pattern 1641 @param[out] rows Row number for each entry 1642 @param[out] cols Column number for each entry 1643 1644 @ref User 1645 **/ 1646 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1647 CeedInt num_suboperators, single_entries; 1648 CeedOperator *sub_operators; 1649 bool is_composite; 1650 CeedCall(CeedOperatorCheckReady(op)); 1651 1652 if (op->LinearAssembleSymbolic) { 1653 // Backend version 1654 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 1655 return CEED_ERROR_SUCCESS; 1656 } else { 1657 // Operator fallback 1658 CeedOperator op_fallback; 1659 1660 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1661 if (op_fallback) { 1662 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 1663 return CEED_ERROR_SUCCESS; 1664 } 1665 } 1666 1667 // Default interface implementation 1668 1669 // count entries and allocate rows, cols arrays 1670 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1671 *num_entries = 0; 1672 if (is_composite) { 1673 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1674 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1675 for (CeedInt k = 0; k < num_suboperators; ++k) { 1676 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1677 *num_entries += single_entries; 1678 } 1679 } else { 1680 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 1681 *num_entries += single_entries; 1682 } 1683 CeedCall(CeedCalloc(*num_entries, rows)); 1684 CeedCall(CeedCalloc(*num_entries, cols)); 1685 1686 // assemble nonzero locations 1687 CeedInt offset = 0; 1688 if (is_composite) { 1689 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1690 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1691 for (CeedInt k = 0; k < num_suboperators; ++k) { 1692 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 1693 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1694 offset += single_entries; 1695 } 1696 } else { 1697 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 1698 } 1699 1700 return CEED_ERROR_SUCCESS; 1701 } 1702 1703 /** 1704 @brief Fully assemble the nonzero entries of a linear operator. 1705 1706 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 1707 1708 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 1709 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added, 1710 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1711 1712 This will generally be slow unless your operator is low-order. 1713 1714 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1715 1716 @param[in] op CeedOperator to assemble 1717 @param[out] values Values to assemble into matrix 1718 1719 @ref User 1720 **/ 1721 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1722 CeedInt num_suboperators, single_entries = 0; 1723 CeedOperator *sub_operators; 1724 CeedCall(CeedOperatorCheckReady(op)); 1725 1726 if (op->LinearAssemble) { 1727 // Backend version 1728 CeedCall(op->LinearAssemble(op, values)); 1729 return CEED_ERROR_SUCCESS; 1730 } else { 1731 // Operator fallback 1732 CeedOperator op_fallback; 1733 1734 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1735 if (op_fallback) { 1736 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 1737 return CEED_ERROR_SUCCESS; 1738 } 1739 } 1740 1741 // Default interface implementation 1742 bool is_composite; 1743 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1744 1745 CeedInt offset = 0; 1746 if (is_composite) { 1747 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 1748 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1749 for (CeedInt k = 0; k < num_suboperators; k++) { 1750 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 1751 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 1752 offset += single_entries; 1753 } 1754 } else { 1755 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 1756 } 1757 1758 return CEED_ERROR_SUCCESS; 1759 } 1760 1761 /** 1762 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 1763 1764 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1765 1766 @param[in] op Composite CeedOperator 1767 @param[in] num_skip_indices Number of suboperators to skip 1768 @param[in] skip_indices Array of indices of suboperators to skip 1769 @param[out] mult Vector to store multiplicity (of size l_size) 1770 1771 @return An error code: 0 - success, otherwise - failure 1772 1773 @ref User 1774 **/ 1775 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 1776 CeedCall(CeedOperatorCheckReady(op)); 1777 1778 Ceed ceed; 1779 CeedInt num_sub_ops; 1780 CeedSize l_vec_len; 1781 CeedScalar *mult_array; 1782 CeedVector ones_l_vec; 1783 CeedElemRestriction elem_restr; 1784 CeedOperator *sub_ops; 1785 1786 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1787 1788 // Zero mult vector 1789 CeedCall(CeedVectorSetValue(mult, 0.0)); 1790 1791 // Get suboperators 1792 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_ops)); 1793 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_ops)); 1794 if (num_sub_ops == 0) return CEED_ERROR_SUCCESS; 1795 1796 // Work vector 1797 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 1798 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 1799 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 1800 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 1801 1802 // Compute multiplicity across suboperators 1803 for (CeedInt i = 0; i < num_sub_ops; i++) { 1804 const CeedScalar *sub_mult_array; 1805 CeedVector sub_mult_l_vec, ones_e_vec; 1806 1807 // -- Check for suboperator to skip 1808 for (CeedInt j = 0; j < num_skip_indices; j++) { 1809 if (skip_indices[j] == i) continue; 1810 } 1811 1812 // -- Sub operator multiplicity 1813 CeedCall(CeedOperatorGetActiveElemRestriction(sub_ops[i], &elem_restr)); 1814 CeedCall(CeedElemRestrictionCreateVector(elem_restr, &sub_mult_l_vec, &ones_e_vec)); 1815 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 1816 CeedCall(CeedElemRestrictionApply(elem_restr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 1817 CeedCall(CeedElemRestrictionApply(elem_restr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 1818 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 1819 // ---- Flag every node present in the current suboperator 1820 for (CeedInt j = 0; j < l_vec_len; j++) { 1821 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 1822 } 1823 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 1824 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 1825 CeedCall(CeedVectorDestroy(&ones_e_vec)); 1826 } 1827 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 1828 CeedCall(CeedVectorDestroy(&ones_l_vec)); 1829 1830 return CEED_ERROR_SUCCESS; 1831 } 1832 1833 /** 1834 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 1835 grid interpolation 1836 1837 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 1838 1839 @param[in] op_fine Fine grid operator 1840 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1841 @param[in] rstr_coarse Coarse grid restriction 1842 @param[in] basis_coarse Coarse grid active vector basis 1843 @param[out] op_coarse Coarse grid operator 1844 @param[out] op_prolong Coarse to fine operator 1845 @param[out] op_restrict Fine to coarse operator 1846 1847 @return An error code: 0 - success, otherwise - failure 1848 1849 @ref User 1850 **/ 1851 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1852 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 1853 CeedCall(CeedOperatorCheckReady(op_fine)); 1854 1855 // Build prolongation matrix 1856 CeedBasis basis_fine, basis_c_to_f; 1857 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 1858 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 1859 1860 // Core code 1861 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 1862 1863 return CEED_ERROR_SUCCESS; 1864 } 1865 1866 /** 1867 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 1868 1869 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 1870 1871 @param[in] op_fine Fine grid operator 1872 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1873 @param[in] rstr_coarse Coarse grid restriction 1874 @param[in] basis_coarse Coarse grid active vector basis 1875 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1876 @param[out] op_coarse Coarse grid operator 1877 @param[out] op_prolong Coarse to fine operator 1878 @param[out] op_restrict Fine to coarse operator 1879 1880 @return An error code: 0 - success, otherwise - failure 1881 1882 @ref User 1883 **/ 1884 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1885 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 1886 CeedOperator *op_restrict) { 1887 CeedCall(CeedOperatorCheckReady(op_fine)); 1888 Ceed ceed; 1889 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 1890 1891 // Check for compatible quadrature spaces 1892 CeedBasis basis_fine; 1893 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 1894 CeedInt Q_f, Q_c; 1895 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 1896 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 1897 if (Q_f != Q_c) { 1898 // LCOV_EXCL_START 1899 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 1900 // LCOV_EXCL_STOP 1901 } 1902 1903 // Coarse to fine basis 1904 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1905 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 1906 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 1907 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 1908 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 1909 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 1910 CeedScalar *q_ref, *q_weight, *grad; 1911 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 1912 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 1913 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 1914 CeedBasis basis_c_to_f; 1915 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 1916 CeedCall(CeedFree(&q_ref)); 1917 CeedCall(CeedFree(&q_weight)); 1918 CeedCall(CeedFree(&grad)); 1919 1920 // Core code 1921 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 1922 return CEED_ERROR_SUCCESS; 1923 } 1924 1925 /** 1926 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 1927 1928 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 1929 1930 @param[in] op_fine Fine grid operator 1931 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1932 @param[in] rstr_coarse Coarse grid restriction 1933 @param[in] basis_coarse Coarse grid active vector basis 1934 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1935 @param[out] op_coarse Coarse grid operator 1936 @param[out] op_prolong Coarse to fine operator 1937 @param[out] op_restrict Fine to coarse operator 1938 1939 @return An error code: 0 - success, otherwise - failure 1940 1941 @ref User 1942 **/ 1943 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1944 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 1945 CeedOperator *op_restrict) { 1946 CeedCall(CeedOperatorCheckReady(op_fine)); 1947 Ceed ceed; 1948 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 1949 1950 // Check for compatible quadrature spaces 1951 CeedBasis basis_fine; 1952 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 1953 CeedInt Q_f, Q_c; 1954 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 1955 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 1956 if (Q_f != Q_c) { 1957 // LCOV_EXCL_START 1958 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 1959 // LCOV_EXCL_STOP 1960 } 1961 1962 // Coarse to fine basis 1963 CeedElemTopology topo; 1964 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 1965 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1966 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 1967 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 1968 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 1969 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 1970 CeedScalar *q_ref, *q_weight, *grad; 1971 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 1972 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 1973 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 1974 CeedBasis basis_c_to_f; 1975 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 1976 CeedCall(CeedFree(&q_ref)); 1977 CeedCall(CeedFree(&q_weight)); 1978 CeedCall(CeedFree(&grad)); 1979 1980 // Core code 1981 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 1982 return CEED_ERROR_SUCCESS; 1983 } 1984 1985 /** 1986 @brief Build a FDM based approximate inverse for each element for a CeedOperator 1987 1988 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 1989 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V. 1990 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T 1991 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear. 1992 1993 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1994 1995 @param[in] op CeedOperator to create element inverses 1996 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 1997 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1998 1999 @return An error code: 0 - success, otherwise - failure 2000 2001 @ref User 2002 **/ 2003 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2004 CeedCall(CeedOperatorCheckReady(op)); 2005 2006 if (op->CreateFDMElementInverse) { 2007 // Backend version 2008 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2009 return CEED_ERROR_SUCCESS; 2010 } else { 2011 // Operator fallback 2012 CeedOperator op_fallback; 2013 2014 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2015 if (op_fallback) { 2016 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2017 return CEED_ERROR_SUCCESS; 2018 } 2019 } 2020 2021 // Default interface implementation 2022 Ceed ceed, ceed_parent; 2023 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2024 CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent)); 2025 ceed_parent = ceed_parent ? ceed_parent : ceed; 2026 CeedQFunction qf; 2027 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2028 2029 // Determine active input basis 2030 bool interp = false, grad = false; 2031 CeedBasis basis = NULL; 2032 CeedElemRestriction rstr = NULL; 2033 CeedOperatorField *op_fields; 2034 CeedQFunctionField *qf_fields; 2035 CeedInt num_input_fields; 2036 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2037 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2038 for (CeedInt i = 0; i < num_input_fields; i++) { 2039 CeedVector vec; 2040 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2041 if (vec == CEED_VECTOR_ACTIVE) { 2042 CeedEvalMode eval_mode; 2043 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2044 interp = interp || eval_mode == CEED_EVAL_INTERP; 2045 grad = grad || eval_mode == CEED_EVAL_GRAD; 2046 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2047 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2048 } 2049 } 2050 if (!basis) { 2051 // LCOV_EXCL_START 2052 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2053 // LCOV_EXCL_STOP 2054 } 2055 CeedSize l_size = 1; 2056 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2057 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2058 CeedCall(CeedBasisGetNumNodes(basis, &elem_size)); 2059 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2060 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2061 CeedCall(CeedBasisGetDimension(basis, &dim)); 2062 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2063 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2064 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2065 2066 // Build and diagonalize 1D Mass and Laplacian 2067 bool tensor_basis; 2068 CeedCall(CeedBasisIsTensor(basis, &tensor_basis)); 2069 if (!tensor_basis) { 2070 // LCOV_EXCL_START 2071 return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2072 // LCOV_EXCL_STOP 2073 } 2074 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2075 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2076 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2077 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2078 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2079 CeedCall(CeedCalloc(P_1d, &lambda)); 2080 // -- Build matrices 2081 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2082 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2083 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2084 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2085 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2086 2087 // -- Diagonalize 2088 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2089 CeedCall(CeedFree(&mass)); 2090 CeedCall(CeedFree(&laplace)); 2091 for (CeedInt i = 0; i < P_1d; i++) { 2092 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2093 } 2094 CeedCall(CeedFree(&x)); 2095 2096 // Assemble QFunction 2097 CeedVector assembled; 2098 CeedElemRestriction rstr_qf; 2099 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2100 CeedInt layout[3]; 2101 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2102 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2103 CeedScalar max_norm = 0; 2104 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2105 2106 // Calculate element averages 2107 CeedInt num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2108 CeedScalar *elem_avg; 2109 const CeedScalar *assembled_array, *q_weight_array; 2110 CeedVector q_weight; 2111 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2112 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2113 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2114 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2115 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2116 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2117 for (CeedInt e = 0; e < num_elem; e++) { 2118 CeedInt count = 0; 2119 for (CeedInt q = 0; q < num_qpts; q++) { 2120 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2121 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2122 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2123 count++; 2124 } 2125 } 2126 } 2127 if (count) { 2128 elem_avg[e] /= count; 2129 } else { 2130 elem_avg[e] = 1.0; 2131 } 2132 } 2133 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2134 CeedCall(CeedVectorDestroy(&assembled)); 2135 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2136 CeedCall(CeedVectorDestroy(&q_weight)); 2137 2138 // Build FDM diagonal 2139 CeedVector q_data; 2140 CeedScalar *q_data_array, *fdm_diagonal; 2141 CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal)); 2142 const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON; 2143 for (CeedInt c = 0; c < num_comp; c++) { 2144 for (CeedInt n = 0; n < elem_size; n++) { 2145 if (interp) fdm_diagonal[c * elem_size + n] = 1.0; 2146 if (grad) { 2147 for (CeedInt d = 0; d < dim; d++) { 2148 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2149 fdm_diagonal[c * elem_size + n] += lambda[i]; 2150 } 2151 } 2152 if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound; 2153 } 2154 } 2155 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data)); 2156 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2157 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2158 for (CeedInt e = 0; e < num_elem; e++) { 2159 for (CeedInt c = 0; c < num_comp; c++) { 2160 for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]); 2161 } 2162 } 2163 CeedCall(CeedFree(&elem_avg)); 2164 CeedCall(CeedFree(&fdm_diagonal)); 2165 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2166 2167 // Setup FDM operator 2168 // -- Basis 2169 CeedBasis fdm_basis; 2170 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2171 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2172 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2173 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2174 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2175 CeedCall(CeedFree(&fdm_interp)); 2176 CeedCall(CeedFree(&grad_dummy)); 2177 CeedCall(CeedFree(&q_ref_dummy)); 2178 CeedCall(CeedFree(&q_weight_dummy)); 2179 CeedCall(CeedFree(&lambda)); 2180 2181 // -- Restriction 2182 CeedElemRestriction rstr_qd_i; 2183 CeedInt strides[3] = {1, elem_size, elem_size * num_comp}; 2184 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i)); 2185 // -- QFunction 2186 CeedQFunction qf_fdm; 2187 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2188 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2189 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2190 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2191 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2192 // -- QFunction context 2193 CeedInt *num_comp_data; 2194 CeedCall(CeedCalloc(1, &num_comp_data)); 2195 num_comp_data[0] = num_comp; 2196 CeedQFunctionContext ctx_fdm; 2197 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2198 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2199 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2200 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2201 // -- Operator 2202 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2203 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2204 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data)); 2205 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2206 2207 // Cleanup 2208 CeedCall(CeedVectorDestroy(&q_data)); 2209 CeedCall(CeedBasisDestroy(&fdm_basis)); 2210 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2211 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2212 2213 return CEED_ERROR_SUCCESS; 2214 } 2215 2216 /// @} 2217