1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a `CeedQFunction` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality 28 29 @param[in] fallback_ceed `Ceed` on which to create fallback `CeedQFunction` 30 @param[in] qf `CeedQFunction` to create fallback for 31 @param[out] qf_fallback Fallback `CeedQFunction` 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 char *source_path_with_name = NULL; 39 CeedInt num_input_fields, num_output_fields; 40 Ceed ceed; 41 CeedQFunctionField *input_fields, *output_fields; 42 43 // Check if NULL qf passed in 44 if (!qf) return CEED_ERROR_SUCCESS; 45 46 CeedCall(CeedQFunctionGetCeed(qf, &ceed)); 47 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 48 CeedDebug(ceed, "Creating fallback CeedQFunction\n"); 49 50 if (qf->source_path) { 51 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 52 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 53 memcpy(source_path_with_name, qf->source_path, path_len); 54 memcpy(&source_path_with_name[path_len], ":", 1); 55 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 56 } else { 57 CeedCall(CeedCalloc(1, &source_path_with_name)); 58 } 59 60 { 61 CeedInt vec_length; 62 CeedQFunctionUser f; 63 64 CeedCall(CeedQFunctionGetVectorLength(qf, &vec_length)); 65 CeedCall(CeedQFunctionGetUserFunction(qf, &f)); 66 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, vec_length, f, source_path_with_name, qf_fallback)); 67 } 68 { 69 CeedQFunctionContext ctx; 70 71 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 72 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 73 } 74 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 75 for (CeedInt i = 0; i < num_input_fields; i++) { 76 const char *field_name; 77 CeedInt size; 78 CeedEvalMode eval_mode; 79 80 CeedCall(CeedQFunctionFieldGetData(input_fields[i], &field_name, &size, &eval_mode)); 81 CeedCall(CeedQFunctionAddInput(*qf_fallback, field_name, size, eval_mode)); 82 } 83 for (CeedInt i = 0; i < num_output_fields; i++) { 84 const char *field_name; 85 CeedInt size; 86 CeedEvalMode eval_mode; 87 88 CeedCall(CeedQFunctionFieldGetData(output_fields[i], &field_name, &size, &eval_mode)); 89 CeedCall(CeedQFunctionAddOutput(*qf_fallback, field_name, size, eval_mode)); 90 } 91 CeedCall(CeedFree(&source_path_with_name)); 92 return CEED_ERROR_SUCCESS; 93 } 94 95 /** 96 @brief Duplicate a `CeedOperator` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality 97 98 @param[in,out] op `CeedOperator` to create fallback for 99 100 @return An error code: 0 - success, otherwise - failure 101 102 @ref Developer 103 **/ 104 static int CeedOperatorCreateFallback(CeedOperator op) { 105 bool is_composite; 106 Ceed ceed, ceed_fallback; 107 CeedOperator op_fallback; 108 109 // Check not already created 110 if (op->op_fallback) return CEED_ERROR_SUCCESS; 111 112 // Fallback Ceed 113 CeedCall(CeedOperatorGetCeed(op, &ceed)); 114 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 115 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 116 117 CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); 118 CeedDebug(ceed, "Creating fallback CeedOperator\n"); 119 120 // Clone Op 121 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 122 if (is_composite) { 123 CeedInt num_suboperators; 124 CeedOperator *sub_operators; 125 126 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 127 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 128 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 129 for (CeedInt i = 0; i < num_suboperators; i++) { 130 CeedOperator op_sub_fallback; 131 132 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 133 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 134 } 135 } else { 136 CeedInt num_input_fields, num_output_fields; 137 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 138 CeedOperatorField *input_fields, *output_fields; 139 140 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 141 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 142 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 143 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 144 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 145 for (CeedInt i = 0; i < num_input_fields; i++) { 146 const char *field_name; 147 CeedVector vec; 148 CeedElemRestriction rstr; 149 CeedBasis basis; 150 151 CeedCall(CeedOperatorFieldGetData(input_fields[i], &field_name, &rstr, &basis, &vec)); 152 CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec)); 153 } 154 for (CeedInt i = 0; i < num_output_fields; i++) { 155 const char *field_name; 156 CeedVector vec; 157 CeedElemRestriction rstr; 158 CeedBasis basis; 159 160 CeedCall(CeedOperatorFieldGetData(output_fields[i], &field_name, &rstr, &basis, &vec)); 161 CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec)); 162 } 163 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 164 // Cleanup 165 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 166 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 167 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 168 } 169 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 170 CeedCall(CeedOperatorCheckReady(op_fallback)); 171 // Note: No ref-counting here so we don't get caught in a reference loop. 172 // The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback. 173 op->op_fallback = op_fallback; 174 op_fallback->op_fallback_parent = op; 175 return CEED_ERROR_SUCCESS; 176 } 177 178 /** 179 @brief Core logic for assembling operator diagonal or point block diagonal 180 181 @param[in] op `CeedOperator` to assemble diagonal or point block diagonal 182 @param[in] request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 183 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 184 @param[out] assembled `CeedVector` to store assembled diagonal 185 186 @return An error code: 0 - success, otherwise - failure 187 188 @ref Developer 189 **/ 190 static inline int CeedSingleOperatorLinearAssembleAddDiagonal_Mesh(CeedOperator op, CeedRequest *request, const bool is_point_block, 191 CeedVector assembled) { 192 Ceed ceed; 193 bool is_composite; 194 195 CeedCall(CeedOperatorGetCeed(op, &ceed)); 196 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 197 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 198 199 // Assemble QFunction 200 CeedInt layout_qf[3]; 201 const CeedScalar *assembled_qf_array; 202 CeedVector assembled_qf = NULL; 203 CeedElemRestriction assembled_elem_rstr = NULL; 204 205 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 206 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf)); 207 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 208 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 209 210 // Get assembly data 211 const CeedEvalMode **eval_modes_in, **eval_modes_out; 212 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 213 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 214 CeedBasis *active_bases_in, *active_bases_out; 215 CeedElemRestriction *active_elem_rstrs_in, *active_elem_rstrs_out; 216 CeedOperatorAssemblyData data; 217 218 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 219 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 220 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, 221 &num_output_components)); 222 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL)); 223 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out)); 224 225 // Loop over all active bases (find matching input/output pairs) 226 for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) { 227 CeedInt b_in, b_out, num_elem, num_nodes, num_qpts, num_comp; 228 bool has_eval_none = false; 229 CeedScalar *elem_diag_array, *identity = NULL; 230 CeedVector elem_diag; 231 CeedElemRestriction diag_elem_rstr; 232 233 if (num_active_bases_in <= num_active_bases_out) { 234 b_in = b; 235 for (b_out = 0; b_out < num_active_bases_out; b_out++) { 236 if (active_bases_in[b_in] == active_bases_out[b_out]) { 237 break; 238 } 239 } 240 if (b_out == num_active_bases_out) { 241 continue; 242 } // No matching output basis found 243 } else { 244 b_out = b; 245 for (b_in = 0; b_in < num_active_bases_in; b_in++) { 246 if (active_bases_in[b_in] == active_bases_out[b_out]) { 247 break; 248 } 249 } 250 if (b_in == num_active_bases_in) { 251 continue; 252 } // No matching output basis found 253 } 254 CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], ceed, CEED_ERROR_UNSUPPORTED, 255 "Cannot assemble operator diagonal with different input and output active element restrictions"); 256 257 // Assemble point block diagonal restriction, if needed 258 if (is_point_block) { 259 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr)); 260 } else { 261 CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr)); 262 } 263 264 // Create diagonal vector 265 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 266 267 // Assemble element operator diagonals 268 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 269 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 270 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 271 CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes)); 272 CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp)); 273 if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes; 274 else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts)); 275 276 // Construct identity matrix for basis if required 277 for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) { 278 has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE); 279 } 280 for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) { 281 has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE); 282 } 283 if (has_eval_none) { 284 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 285 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 286 } 287 288 // Compute the diagonal of B^T D B 289 // Each element 290 for (CeedSize e = 0; e < num_elem; e++) { 291 // Each basis eval mode pair 292 CeedInt d_out = 0, q_comp_out; 293 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 294 295 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) { 296 CeedInt d_in = 0, q_comp_in; 297 const CeedScalar *B_t = NULL; 298 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 299 300 CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t)); 301 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out)); 302 if (q_comp_out > 1) { 303 if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0; 304 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 305 } 306 eval_mode_out_prev = eval_modes_out[b_out][e_out]; 307 308 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) { 309 const CeedScalar *B = NULL; 310 311 CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B)); 312 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in)); 313 if (q_comp_in > 1) { 314 if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0; 315 else B = &B[(++d_in) * num_qpts * num_nodes]; 316 } 317 eval_mode_in_prev = eval_modes_in[b_in][e_in]; 318 319 // Each component 320 for (CeedInt c_out = 0; c_out < num_comp; c_out++) { 321 // Each qpt/node pair 322 for (CeedInt q = 0; q < num_qpts; q++) { 323 if (is_point_block) { 324 // Point Block Diagonal 325 for (CeedInt c_in = 0; c_in < num_comp; c_in++) { 326 const CeedSize c_offset = 327 (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out; 328 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 329 330 for (CeedInt n = 0; n < num_nodes; n++) { 331 elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] += 332 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 333 } 334 } 335 } else { 336 // Diagonal Only 337 const CeedInt c_offset = 338 (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out; 339 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 340 341 for (CeedInt n = 0; n < num_nodes; n++) { 342 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 343 } 344 } 345 } 346 } 347 } 348 } 349 } 350 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 351 352 // Assemble local operator diagonal 353 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 354 355 // Cleanup 356 CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 357 CeedCall(CeedVectorDestroy(&elem_diag)); 358 CeedCall(CeedFree(&identity)); 359 } 360 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 361 CeedCall(CeedVectorDestroy(&assembled_qf)); 362 return CEED_ERROR_SUCCESS; 363 } 364 365 /** 366 @brief Core logic for assembling operator diagonal or point block diagonal 367 368 @param[in] op `CeedOperator` to assemble diagonal or point block diagonal 369 @param[in] request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 370 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 371 @param[out] assembled `CeedVector` to store assembled diagonal 372 373 @return An error code: 0 - success, otherwise - failure 374 375 @ref Developer 376 **/ 377 static inline int CeedSingleOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block, 378 CeedVector assembled) { 379 Ceed ceed; 380 bool is_at_points; 381 382 CeedCall(CeedOperatorGetCeed(op, &ceed)); 383 CeedCall(CeedOperatorIsAtPoints(op, &is_at_points)); 384 CeedCheck(!is_at_points, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 385 CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal_Mesh(op, request, is_point_block, assembled)); 386 return CEED_ERROR_SUCCESS; 387 } 388 389 /** 390 @brief Core logic for assembling composite operator diagonal 391 392 @param[in] op `CeedOperator` to assemble point block diagonal 393 @param[in] request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 394 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 395 @param[out] assembled `CeedVector` to store assembled diagonal 396 397 @return An error code: 0 - success, otherwise - failure 398 399 @ref Developer 400 **/ 401 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block, 402 CeedVector assembled) { 403 CeedInt num_sub; 404 CeedOperator *suboperators; 405 406 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 407 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 408 for (CeedInt i = 0; i < num_sub; i++) { 409 if (is_point_block) { 410 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 411 } else { 412 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 413 } 414 } 415 return CEED_ERROR_SUCCESS; 416 } 417 418 /** 419 @brief Build nonzero pattern for non-composite CeedOperator`. 420 421 Users should generally use @ref CeedOperatorLinearAssembleSymbolic(). 422 423 @param[in] op `CeedOperator` to assemble nonzero pattern 424 @param[in] offset Offset for number of entries 425 @param[out] rows Row number for each entry 426 @param[out] cols Column number for each entry 427 428 @return An error code: 0 - success, otherwise - failure 429 430 @ref Developer 431 **/ 432 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 433 Ceed ceed; 434 bool is_composite; 435 CeedSize num_nodes_in, num_nodes_out, local_num_entries, count = 0; 436 CeedInt num_elem_in, elem_size_in, num_comp_in, layout_er_in[3]; 437 CeedInt num_elem_out, elem_size_out, num_comp_out, layout_er_out[3]; 438 CeedScalar *array; 439 const CeedScalar *elem_dof_a_in, *elem_dof_a_out; 440 CeedVector index_vec_in, index_vec_out, elem_dof_in, elem_dof_out; 441 CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out; 442 443 CeedCall(CeedOperatorGetCeed(op, &ceed)); 444 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 445 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 446 447 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out)); 448 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 449 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 450 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 451 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 452 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, layout_er_in)); 453 454 // Determine elem_dof relation for input 455 CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in)); 456 CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array)); 457 for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i; 458 CeedCall(CeedVectorRestoreArray(index_vec_in, &array)); 459 CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in)); 460 CeedCall(CeedVectorSetValue(elem_dof_in, 0.0)); 461 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in)); 462 CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE)); 463 CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in)); 464 CeedCall(CeedVectorDestroy(&index_vec_in)); 465 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in)); 466 467 if (elem_rstr_in != elem_rstr_out) { 468 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 469 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 470 "Active input and output operator restrictions must have the same number of elements"); 471 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 472 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 473 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, layout_er_out)); 474 475 // Determine elem_dof relation for output 476 CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out)); 477 CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array)); 478 for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i; 479 CeedCall(CeedVectorRestoreArray(index_vec_out, &array)); 480 CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out)); 481 CeedCall(CeedVectorSetValue(elem_dof_out, 0.0)); 482 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out)); 483 CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE)); 484 CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out)); 485 CeedCall(CeedVectorDestroy(&index_vec_out)); 486 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out)); 487 } else { 488 num_elem_out = num_elem_in; 489 elem_size_out = elem_size_in; 490 num_comp_out = num_comp_in; 491 layout_er_out[0] = layout_er_in[0]; 492 layout_er_out[1] = layout_er_in[1]; 493 layout_er_out[2] = layout_er_in[2]; 494 elem_dof_a_out = elem_dof_a_in; 495 } 496 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 497 498 // Determine i, j locations for element matrices 499 for (CeedInt e = 0; e < num_elem_in; e++) { 500 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 501 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 502 for (CeedInt i = 0; i < elem_size_out; i++) { 503 for (CeedInt j = 0; j < elem_size_in; j++) { 504 const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2]; 505 const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2]; 506 const CeedInt row = elem_dof_a_out[elem_dof_index_row]; 507 const CeedInt col = elem_dof_a_in[elem_dof_index_col]; 508 509 rows[offset + count] = row; 510 cols[offset + count] = col; 511 count++; 512 } 513 } 514 } 515 } 516 } 517 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 518 CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in)); 519 CeedCall(CeedVectorDestroy(&elem_dof_in)); 520 if (elem_rstr_in != elem_rstr_out) { 521 CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out)); 522 CeedCall(CeedVectorDestroy(&elem_dof_out)); 523 } 524 return CEED_ERROR_SUCCESS; 525 } 526 527 /** 528 @brief Assemble nonzero entries for non-composite `CeedOperator`. 529 530 Users should generally use @ref CeedOperatorLinearAssemble(). 531 532 @param[in] op `CeedOperator` to assemble 533 @param[in] offset Offset for number of entries 534 @param[out] values Values to assemble into matrix 535 536 @return An error code: 0 - success, otherwise - failure 537 538 @ref Developer 539 **/ 540 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 541 Ceed ceed; 542 bool is_composite; 543 544 CeedCall(CeedOperatorGetCeed(op, &ceed)); 545 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 546 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 547 548 // Early exit for empty operator 549 { 550 CeedInt num_elem = 0; 551 552 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 553 if (num_elem == 0) return CEED_ERROR_SUCCESS; 554 } 555 556 if (op->LinearAssembleSingle) { 557 // Backend version 558 CeedCall(op->LinearAssembleSingle(op, offset, values)); 559 return CEED_ERROR_SUCCESS; 560 } else { 561 // Operator fallback 562 CeedOperator op_fallback; 563 564 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 565 if (op_fallback) { 566 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 567 return CEED_ERROR_SUCCESS; 568 } 569 } 570 571 // Assemble QFunction 572 CeedInt layout_qf[3]; 573 const CeedScalar *assembled_qf_array; 574 CeedVector assembled_qf = NULL; 575 CeedElemRestriction assembled_elem_rstr = NULL; 576 577 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE)); 578 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf)); 579 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 580 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 581 582 // Get assembly data 583 CeedInt num_elem_in, elem_size_in, num_comp_in, num_qpts_in; 584 CeedInt num_elem_out, elem_size_out, num_comp_out, num_qpts_out; 585 CeedSize local_num_entries, count = 0; 586 const CeedEvalMode **eval_modes_in, **eval_modes_out; 587 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 588 CeedBasis *active_bases_in, *active_bases_out, basis_in, basis_out; 589 const CeedScalar **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out; 590 CeedElemRestriction elem_rstr_in, elem_rstr_out; 591 CeedRestrictionType elem_rstr_type_in, elem_rstr_type_out; 592 const bool *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL; 593 const CeedInt8 *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL; 594 CeedOperatorAssemblyData data; 595 596 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 597 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out, 598 &num_eval_modes_out, &eval_modes_out, NULL, NULL)); 599 600 CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED, 601 "Cannot assemble operator with multiple active bases"); 602 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); 603 604 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out)); 605 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 606 basis_in = active_bases_in[0]; 607 basis_out = active_bases_out[0]; 608 B_mat_in = B_mats_in[0]; 609 B_mat_out = B_mats_out[0]; 610 611 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 612 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 613 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 614 if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in; 615 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in)); 616 617 CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in)); 618 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 619 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in)); 620 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 621 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in)); 622 } 623 624 if (elem_rstr_in != elem_rstr_out) { 625 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 626 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 627 "Active input and output operator restrictions must have the same number of elements"); 628 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 629 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 630 if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out; 631 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out)); 632 CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED, 633 "Active input and output bases must have the same number of quadrature points"); 634 635 CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out)); 636 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 637 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out)); 638 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 639 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out)); 640 } 641 } else { 642 num_elem_out = num_elem_in; 643 elem_size_out = elem_size_in; 644 num_comp_out = num_comp_in; 645 num_qpts_out = num_qpts_in; 646 647 elem_rstr_orients_out = elem_rstr_orients_in; 648 elem_rstr_curl_orients_out = elem_rstr_curl_orients_in; 649 } 650 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 651 652 // Loop over elements and put in data structure 653 // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 654 CeedTensorContract contract; 655 CeedScalar *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL; 656 657 CeedCall(CeedBasisGetTensorContract(basis_in, &contract)); 658 CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat)); 659 CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat)); 660 if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b)); 661 662 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 663 for (CeedSize e = 0; e < num_elem_in; e++) { 664 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 665 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 666 // Compute B^T*D 667 for (CeedSize n = 0; n < elem_size_out; n++) { 668 for (CeedSize q = 0; q < num_qpts_in; q++) { 669 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 670 const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in; 671 CeedScalar sum = 0.0; 672 673 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 674 const CeedSize b_out_index = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n; 675 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out; 676 const CeedSize qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 677 678 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 679 } 680 BTD_mat[btd_index] = sum; 681 } 682 } 683 } 684 685 // Form element matrix itself (for each block component) 686 if (contract) { 687 CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE, 688 false, B_mat_in, elem_mat)); 689 } else { 690 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0])); 691 } 692 693 // Transform the element matrix if required 694 if (elem_rstr_orients_out) { 695 const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out]; 696 697 for (CeedInt i = 0; i < elem_size_out; i++) { 698 const double orient = elem_orients[i] ? -1.0 : 1.0; 699 700 for (CeedInt j = 0; j < elem_size_in; j++) { 701 elem_mat[i * elem_size_in + j] *= orient; 702 } 703 } 704 } else if (elem_rstr_curl_orients_out) { 705 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out]; 706 707 // T^T*(B^T*D*B) 708 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 709 for (CeedInt i = 0; i < elem_size_out; i++) { 710 for (CeedInt j = 0; j < elem_size_in; j++) { 711 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] + 712 (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) + 713 (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0); 714 } 715 } 716 } 717 if (elem_rstr_orients_in) { 718 const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in]; 719 720 for (CeedInt i = 0; i < elem_size_out; i++) { 721 for (CeedInt j = 0; j < elem_size_in; j++) { 722 elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0; 723 } 724 } 725 } else if (elem_rstr_curl_orients_in) { 726 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in]; 727 728 // (B^T*D*B)*T 729 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 730 for (CeedInt i = 0; i < elem_size_out; i++) { 731 for (CeedInt j = 0; j < elem_size_in; j++) { 732 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] + 733 (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) + 734 (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0); 735 } 736 } 737 } 738 739 // Put element matrix in coordinate data structure 740 for (CeedInt i = 0; i < elem_size_out; i++) { 741 for (CeedInt j = 0; j < elem_size_in; j++) { 742 vals[offset + count] = elem_mat[i * elem_size_in + j]; 743 count++; 744 } 745 } 746 } 747 } 748 } 749 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 750 CeedCall(CeedVectorRestoreArray(values, &vals)); 751 752 // Cleanup 753 CeedCall(CeedFree(&BTD_mat)); 754 CeedCall(CeedFree(&elem_mat)); 755 CeedCall(CeedFree(&elem_mat_b)); 756 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 757 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in)); 758 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 759 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in)); 760 } 761 if (elem_rstr_in != elem_rstr_out) { 762 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 763 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out)); 764 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 765 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out)); 766 } 767 } 768 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 769 CeedCall(CeedVectorDestroy(&assembled_qf)); 770 return CEED_ERROR_SUCCESS; 771 } 772 773 /** 774 @brief Count number of entries for assembled `CeedOperator` 775 776 @param[in] op `CeedOperator` to assemble 777 @param[out] num_entries Number of entries in assembled representation 778 779 @return An error code: 0 - success, otherwise - failure 780 781 @ref Utility 782 **/ 783 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) { 784 bool is_composite; 785 CeedInt num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out; 786 Ceed ceed; 787 CeedElemRestriction rstr_in, rstr_out; 788 789 CeedCall(CeedOperatorGetCeed(op, &ceed)); 790 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 791 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 792 793 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out)); 794 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in)); 795 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in)); 796 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in)); 797 if (rstr_in != rstr_out) { 798 CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out)); 799 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 800 "Active input and output operator restrictions must have the same number of elements"); 801 CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out)); 802 CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out)); 803 } else { 804 num_elem_out = num_elem_in; 805 elem_size_out = elem_size_in; 806 num_comp_out = num_comp_in; 807 } 808 *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in; 809 return CEED_ERROR_SUCCESS; 810 } 811 812 /** 813 @brief Common code for creating a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` 814 815 @param[in] op_fine Fine grid `CeedOperator` 816 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator` 817 @param[in] rstr_coarse Coarse grid `CeedElemRestriction` 818 @param[in] basis_coarse Coarse grid active vector `CeedBasis` 819 @param[in] basis_c_to_f `CeedBasis` for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction operators 820 @param[out] op_coarse Coarse grid `CeedOperator` 821 @param[out] op_prolong Coarse to fine `CeedOperator`, or `NULL` 822 @param[out] op_restrict Fine to coarse `CeedOperator`, or `NULL` 823 824 @return An error code: 0 - success, otherwise - failure 825 826 @ref Developer 827 **/ 828 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 829 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 830 bool is_composite; 831 Ceed ceed; 832 CeedInt num_comp, num_input_fields, num_output_fields; 833 CeedVector mult_vec = NULL; 834 CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL; 835 CeedOperatorField *input_fields, *output_fields; 836 837 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 838 839 // Check for composite operator 840 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 841 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 842 843 // Coarse Grid 844 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 845 CeedCall(CeedOperatorGetFields(op_fine, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 846 // -- Clone input fields 847 for (CeedInt i = 0; i < num_input_fields; i++) { 848 const char *field_name; 849 CeedVector vec; 850 CeedElemRestriction rstr; 851 CeedBasis basis; 852 853 CeedCall(CeedOperatorFieldGetName(input_fields[i], &field_name)); 854 CeedCall(CeedOperatorFieldGetVector(input_fields[i], &vec)); 855 if (vec == CEED_VECTOR_ACTIVE) { 856 rstr = rstr_coarse; 857 basis = basis_coarse; 858 CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_fine)); 859 } else { 860 CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr)); 861 CeedCall(CeedOperatorFieldGetBasis(input_fields[i], &basis)); 862 } 863 CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec)); 864 } 865 // -- Clone output fields 866 for (CeedInt i = 0; i < num_output_fields; i++) { 867 const char *field_name; 868 CeedVector vec; 869 CeedElemRestriction rstr; 870 CeedBasis basis; 871 872 CeedCall(CeedOperatorFieldGetName(output_fields[i], &field_name)); 873 CeedCall(CeedOperatorFieldGetVector(output_fields[i], &vec)); 874 if (vec == CEED_VECTOR_ACTIVE) { 875 rstr = rstr_coarse; 876 basis = basis_coarse; 877 CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_fine)); 878 } else { 879 CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr)); 880 CeedCall(CeedOperatorFieldGetBasis(output_fields[i], &basis)); 881 } 882 CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec)); 883 } 884 // -- Clone QFunctionAssemblyData 885 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 886 887 // Multiplicity vector 888 if (op_restrict || op_prolong) { 889 CeedVector mult_e_vec; 890 CeedRestrictionType rstr_type; 891 892 CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type)); 893 CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED, 894 "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported"); 895 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 896 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 897 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 898 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 899 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 900 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 901 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 902 CeedCall(CeedVectorDestroy(&mult_e_vec)); 903 CeedCall(CeedVectorReciprocal(mult_vec)); 904 } 905 906 // Clone name 907 bool has_name = op_fine->name; 908 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 909 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 910 911 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 912 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 913 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 914 915 // Restriction/Prolongation Operators 916 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 917 918 // Restriction 919 if (op_restrict) { 920 CeedInt *num_comp_r_data; 921 CeedQFunctionContext ctx_r; 922 CeedQFunction qf_restrict; 923 924 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 925 CeedCall(CeedCalloc(1, &num_comp_r_data)); 926 num_comp_r_data[0] = num_comp; 927 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 928 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 929 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 930 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 931 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 932 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 933 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 934 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 935 936 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 937 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 938 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 939 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 940 941 // Set name 942 char *restriction_name; 943 944 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 945 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 946 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 947 CeedCall(CeedFree(&restriction_name)); 948 949 // Check 950 CeedCall(CeedOperatorCheckReady(*op_restrict)); 951 952 // Cleanup 953 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 954 } 955 956 // Prolongation 957 if (op_prolong) { 958 CeedInt *num_comp_p_data; 959 CeedQFunctionContext ctx_p; 960 CeedQFunction qf_prolong; 961 962 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 963 CeedCall(CeedCalloc(1, &num_comp_p_data)); 964 num_comp_p_data[0] = num_comp; 965 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 966 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 967 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 968 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 969 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 970 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 971 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 972 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 973 974 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 975 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 976 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 977 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 978 979 // Set name 980 char *prolongation_name; 981 982 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 983 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 984 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 985 CeedCall(CeedFree(&prolongation_name)); 986 987 // Check 988 CeedCall(CeedOperatorCheckReady(*op_prolong)); 989 990 // Cleanup 991 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 992 } 993 994 // Check 995 CeedCall(CeedOperatorCheckReady(*op_coarse)); 996 997 // Cleanup 998 CeedCall(CeedVectorDestroy(&mult_vec)); 999 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 1000 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 1001 return CEED_ERROR_SUCCESS; 1002 } 1003 1004 /** 1005 @brief Build 1D mass matrix and Laplacian with perturbation 1006 1007 @param[in] interp_1d Interpolation matrix in one dimension 1008 @param[in] grad_1d Gradient matrix in one dimension 1009 @param[in] q_weight_1d Quadrature weights in one dimension 1010 @param[in] P_1d Number of basis nodes in one dimension 1011 @param[in] Q_1d Number of quadrature points in one dimension 1012 @param[in] dim Dimension of basis 1013 @param[out] mass Assembled mass matrix in one dimension 1014 @param[out] laplace Assembled perturbed Laplacian in one dimension 1015 1016 @return An error code: 0 - success, otherwise - failure 1017 1018 @ref Developer 1019 **/ 1020 CeedPragmaOptimizeOff 1021 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 1022 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 1023 for (CeedInt i = 0; i < P_1d; i++) { 1024 for (CeedInt j = 0; j < P_1d; j++) { 1025 CeedScalar sum = 0.0; 1026 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 1027 mass[i + j * P_1d] = sum; 1028 } 1029 } 1030 // -- Laplacian 1031 for (CeedInt i = 0; i < P_1d; i++) { 1032 for (CeedInt j = 0; j < P_1d; j++) { 1033 CeedScalar sum = 0.0; 1034 1035 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 1036 laplace[i + j * P_1d] = sum; 1037 } 1038 } 1039 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 1040 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 1041 return CEED_ERROR_SUCCESS; 1042 } 1043 CeedPragmaOptimizeOn 1044 1045 /// @} 1046 1047 /// ---------------------------------------------------------------------------- 1048 /// CeedOperator Backend API 1049 /// ---------------------------------------------------------------------------- 1050 /// @addtogroup CeedOperatorBackend 1051 /// @{ 1052 1053 /** 1054 @brief Select correct basis matrix pointer based on @ref CeedEvalMode 1055 1056 @param[in] basis `CeedBasis` from which to get the basis matrix 1057 @param[in] eval_mode Current basis evaluation mode 1058 @param[in] identity Pointer to identity matrix 1059 @param[out] basis_ptr `CeedBasis` pointer to set 1060 1061 @ref Backend 1062 **/ 1063 int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 1064 switch (eval_mode) { 1065 case CEED_EVAL_NONE: 1066 *basis_ptr = identity; 1067 break; 1068 case CEED_EVAL_INTERP: 1069 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 1070 break; 1071 case CEED_EVAL_GRAD: 1072 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 1073 break; 1074 case CEED_EVAL_DIV: 1075 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 1076 break; 1077 case CEED_EVAL_CURL: 1078 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 1079 break; 1080 case CEED_EVAL_WEIGHT: 1081 break; // Caught by QF Assembly 1082 } 1083 assert(*basis_ptr != NULL); 1084 return CEED_ERROR_SUCCESS; 1085 } 1086 1087 /** 1088 @brief Create point block restriction for active `CeedOperatorField` 1089 1090 @param[in] rstr Original `CeedElemRestriction` for active field 1091 @param[out] point_block_rstr Address of the variable where the newly created `CeedElemRestriction` will be stored 1092 1093 @return An error code: 0 - success, otherwise - failure 1094 1095 @ref Backend 1096 **/ 1097 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) { 1098 Ceed ceed; 1099 CeedInt num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets; 1100 CeedSize l_size; 1101 const CeedInt *offsets; 1102 1103 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 1104 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 1105 1106 // Expand offsets 1107 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 1108 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 1109 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 1110 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 1111 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 1112 shift = num_comp; 1113 if (comp_stride != 1) shift *= num_comp; 1114 CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets)); 1115 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 1116 point_block_offsets[i] = offsets[i] * shift; 1117 } 1118 1119 // Create new restriction 1120 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 1121 point_block_offsets, point_block_rstr)); 1122 1123 // Cleanup 1124 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 1125 return CEED_ERROR_SUCCESS; 1126 } 1127 1128 /** 1129 @brief Create object holding `CeedQFunction` assembly data for `CeedOperator` 1130 1131 @param[in] ceed `Ceed` object used to create the `CeedQFunctionAssemblyData` 1132 @param[out] data Address of the variable where the newly created `CeedQFunctionAssemblyData` will be stored 1133 1134 @return An error code: 0 - success, otherwise - failure 1135 1136 @ref Backend 1137 **/ 1138 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 1139 CeedCall(CeedCalloc(1, data)); 1140 (*data)->ref_count = 1; 1141 (*data)->ceed = ceed; 1142 CeedCall(CeedReference(ceed)); 1143 return CEED_ERROR_SUCCESS; 1144 } 1145 1146 /** 1147 @brief Increment the reference counter for a `CeedQFunctionAssemblyData` 1148 1149 @param[in,out] data `CeedQFunctionAssemblyData` to increment the reference counter 1150 1151 @return An error code: 0 - success, otherwise - failure 1152 1153 @ref Backend 1154 **/ 1155 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1156 data->ref_count++; 1157 return CEED_ERROR_SUCCESS; 1158 } 1159 1160 /** 1161 @brief Set re-use of `CeedQFunctionAssemblyData` 1162 1163 @param[in,out] data `CeedQFunctionAssemblyData` to mark for reuse 1164 @param[in] reuse_data Boolean flag indicating data re-use 1165 1166 @return An error code: 0 - success, otherwise - failure 1167 1168 @ref Backend 1169 **/ 1170 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 1171 data->reuse_data = reuse_data; 1172 data->needs_data_update = true; 1173 return CEED_ERROR_SUCCESS; 1174 } 1175 1176 /** 1177 @brief Mark `CeedQFunctionAssemblyData` as stale 1178 1179 @param[in,out] data `CeedQFunctionAssemblyData` to mark as stale 1180 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 1181 1182 @return An error code: 0 - success, otherwise - failure 1183 1184 @ref Backend 1185 **/ 1186 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 1187 data->needs_data_update = needs_data_update; 1188 return CEED_ERROR_SUCCESS; 1189 } 1190 1191 /** 1192 @brief Determine if `CeedQFunctionAssemblyData` needs update 1193 1194 @param[in] data `CeedQFunctionAssemblyData` to mark as stale 1195 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1196 1197 @return An error code: 0 - success, otherwise - failure 1198 1199 @ref Backend 1200 **/ 1201 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1202 *is_update_needed = !data->reuse_data || data->needs_data_update; 1203 return CEED_ERROR_SUCCESS; 1204 } 1205 1206 /** 1207 @brief Copy the pointer to a `CeedQFunctionAssemblyData`. 1208 1209 Both pointers should be destroyed with @ref CeedQFunctionAssemblyDataDestroy(). 1210 1211 Note: If the value of ` *data_copy` passed to this function is non-`NULL` , then it is assumed that ` *data_copy` is a pointer to a `CeedQFunctionAssemblyData`. 1212 This `CeedQFunctionAssemblyData` will be destroyed if ` *data_copy` is the only reference to this `CeedQFunctionAssemblyData`. 1213 1214 @param[in] data `CeedQFunctionAssemblyData` to copy reference to 1215 @param[in,out] data_copy Variable to store copied reference 1216 1217 @return An error code: 0 - success, otherwise - failure 1218 1219 @ref Backend 1220 **/ 1221 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1222 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1223 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1224 *data_copy = data; 1225 return CEED_ERROR_SUCCESS; 1226 } 1227 1228 /** 1229 @brief Get setup status for internal objects for `CeedQFunctionAssemblyData` 1230 1231 @param[in] data `CeedQFunctionAssemblyData` to retrieve status 1232 @param[out] is_setup Boolean flag for setup status 1233 1234 @return An error code: 0 - success, otherwise - failure 1235 1236 @ref Backend 1237 **/ 1238 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1239 *is_setup = data->is_setup; 1240 return CEED_ERROR_SUCCESS; 1241 } 1242 1243 /** 1244 @brief Set internal objects for `CeedQFunctionAssemblyData` 1245 1246 @param[in,out] data `CeedQFunctionAssemblyData` to set objects 1247 @param[in] vec `CeedVector` to store assembled `CeedQFunction` at quadrature points 1248 @param[in] rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction` 1249 1250 @return An error code: 0 - success, otherwise - failure 1251 1252 @ref Backend 1253 **/ 1254 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1255 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1256 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1257 1258 data->is_setup = true; 1259 return CEED_ERROR_SUCCESS; 1260 } 1261 1262 /** 1263 @brief Get internal objects for `CeedQFunctionAssemblyData` 1264 1265 @param[in,out] data `CeedQFunctionAssemblyData` to set objects 1266 @param[out] vec `CeedVector` to store assembled `CeedQFunction` at quadrature points 1267 @param[out] rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction` 1268 1269 @return An error code: 0 - success, otherwise - failure 1270 1271 @ref Backend 1272 **/ 1273 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1274 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1275 1276 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1277 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1278 return CEED_ERROR_SUCCESS; 1279 } 1280 1281 /** 1282 @brief Destroy `CeedQFunctionAssemblyData` 1283 1284 @param[in,out] data `CeedQFunctionAssemblyData` to destroy 1285 1286 @return An error code: 0 - success, otherwise - failure 1287 1288 @ref Backend 1289 **/ 1290 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1291 if (!*data || --(*data)->ref_count > 0) { 1292 *data = NULL; 1293 return CEED_ERROR_SUCCESS; 1294 } 1295 CeedCall(CeedDestroy(&(*data)->ceed)); 1296 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1297 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1298 1299 CeedCall(CeedFree(data)); 1300 return CEED_ERROR_SUCCESS; 1301 } 1302 1303 /** 1304 @brief Get `CeedOperatorAssemblyData` 1305 1306 @param[in] op `CeedOperator` to assemble 1307 @param[out] data `CeedQFunctionAssemblyData` 1308 1309 @return An error code: 0 - success, otherwise - failure 1310 1311 @ref Backend 1312 **/ 1313 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1314 if (!op->op_assembled) { 1315 CeedOperatorAssemblyData data; 1316 1317 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1318 op->op_assembled = data; 1319 } 1320 *data = op->op_assembled; 1321 return CEED_ERROR_SUCCESS; 1322 } 1323 1324 /** 1325 @brief Create object holding `CeedOperator` assembly data. 1326 1327 The `CeedOperatorAssemblyData` holds an array with references to every active `CeedBasis` used in the `CeedOperator`. 1328 An array with references to the corresponding active `CeedElemRestriction` is also stored. 1329 For each active `CeedBasis, the `CeedOperatorAssemblyData` holds an array of all input and output @ref CeedEvalMode for this `CeedBasis`. 1330 The `CeedOperatorAssemblyData` holds an array of offsets for indexing into the assembled `CeedQFunction` arrays to the row representing each @ref CeedEvalMode. 1331 The number of input columns across all active bases for the assembled `CeedQFunction` is also stored. 1332 Lastly, the `CeedOperatorAssembly` data holds assembled matrices representing the full action of the `CeedBasis` for all @ref CeedEvalMode. 1333 1334 @param[in] ceed `Ceed` object used to create the `CeedOperatorAssemblyData` 1335 @param[in] op `CeedOperator` to be assembled 1336 @param[out] data Address of the variable where the newly created `CeedOperatorAssemblyData` will be stored 1337 1338 @return An error code: 0 - success, otherwise - failure 1339 1340 @ref Backend 1341 **/ 1342 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1343 CeedInt num_active_bases_in = 0, num_active_bases_out = 0, offset = 0; 1344 CeedInt num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL; 1345 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1346 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1347 CeedQFunctionField *qf_fields; 1348 CeedQFunction qf; 1349 CeedOperatorField *op_fields; 1350 bool is_composite; 1351 1352 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1353 CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators."); 1354 1355 // Allocate 1356 CeedCall(CeedCalloc(1, data)); 1357 (*data)->ceed = ceed; 1358 CeedCall(CeedReference(ceed)); 1359 1360 // Build OperatorAssembly data 1361 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1362 1363 // Determine active input basis 1364 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1365 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1366 for (CeedInt i = 0; i < num_input_fields; i++) { 1367 CeedVector vec; 1368 1369 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1370 if (vec == CEED_VECTOR_ACTIVE) { 1371 CeedInt index = -1, num_comp, q_comp; 1372 CeedEvalMode eval_mode; 1373 CeedBasis basis_in = NULL; 1374 1375 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1376 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1377 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1378 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1379 for (CeedInt i = 0; i < num_active_bases_in; i++) { 1380 if ((*data)->active_bases_in[i] == basis_in) index = i; 1381 } 1382 if (index == -1) { 1383 CeedElemRestriction elem_rstr_in; 1384 1385 index = num_active_bases_in; 1386 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in)); 1387 (*data)->active_bases_in[num_active_bases_in] = NULL; 1388 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in])); 1389 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in)); 1390 (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL; 1391 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1392 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in])); 1393 CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in)); 1394 num_eval_modes_in[index] = 0; 1395 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in)); 1396 eval_modes_in[index] = NULL; 1397 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in)); 1398 eval_mode_offsets_in[index] = NULL; 1399 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in)); 1400 (*data)->assembled_bases_in[index] = NULL; 1401 num_active_bases_in++; 1402 } 1403 if (eval_mode != CEED_EVAL_WEIGHT) { 1404 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1405 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1406 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1407 for (CeedInt d = 0; d < q_comp; d++) { 1408 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1409 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1410 offset += num_comp; 1411 } 1412 num_eval_modes_in[index] += q_comp; 1413 } 1414 } 1415 } 1416 1417 // Determine active output basis 1418 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1419 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1420 offset = 0; 1421 for (CeedInt i = 0; i < num_output_fields; i++) { 1422 CeedVector vec; 1423 1424 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1425 if (vec == CEED_VECTOR_ACTIVE) { 1426 CeedInt index = -1, num_comp, q_comp; 1427 CeedEvalMode eval_mode; 1428 CeedBasis basis_out = NULL; 1429 1430 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1431 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1432 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1433 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1434 for (CeedInt i = 0; i < num_active_bases_out; i++) { 1435 if ((*data)->active_bases_out[i] == basis_out) index = i; 1436 } 1437 if (index == -1) { 1438 CeedElemRestriction elem_rstr_out; 1439 1440 index = num_active_bases_out; 1441 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out)); 1442 (*data)->active_bases_out[num_active_bases_out] = NULL; 1443 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out])); 1444 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out)); 1445 (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL; 1446 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1447 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out])); 1448 CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out)); 1449 num_eval_modes_out[index] = 0; 1450 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out)); 1451 eval_modes_out[index] = NULL; 1452 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out)); 1453 eval_mode_offsets_out[index] = NULL; 1454 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out)); 1455 (*data)->assembled_bases_out[index] = NULL; 1456 num_active_bases_out++; 1457 } 1458 if (eval_mode != CEED_EVAL_WEIGHT) { 1459 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1460 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1461 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1462 for (CeedInt d = 0; d < q_comp; d++) { 1463 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1464 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1465 offset += num_comp; 1466 } 1467 num_eval_modes_out[index] += q_comp; 1468 } 1469 } 1470 } 1471 (*data)->num_active_bases_in = num_active_bases_in; 1472 (*data)->num_eval_modes_in = num_eval_modes_in; 1473 (*data)->eval_modes_in = eval_modes_in; 1474 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1475 (*data)->num_active_bases_out = num_active_bases_out; 1476 (*data)->num_eval_modes_out = num_eval_modes_out; 1477 (*data)->eval_modes_out = eval_modes_out; 1478 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1479 (*data)->num_output_components = offset; 1480 return CEED_ERROR_SUCCESS; 1481 } 1482 1483 /** 1484 @brief Get `CeedOperator` @ref CeedEvalMode for assembly. 1485 1486 Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object. 1487 1488 @param[in] data `CeedOperatorAssemblyData` 1489 @param[out] num_active_bases_in Total number of active bases for input 1490 @param[out] num_eval_modes_in Pointer to hold array of numbers of input @ref CeedEvalMode, or `NULL`. 1491 `eval_modes_in[0]` holds an array of eval modes for the first active `CeedBasis`. 1492 @param[out] eval_modes_in Pointer to hold arrays of input @ref CeedEvalMode, or `NULL` 1493 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point 1494 @param[out] num_active_bases_out Total number of active bases for output 1495 @param[out] num_eval_modes_out Pointer to hold array of numbers of output @ref CeedEvalMode, or `NULL` 1496 @param[out] eval_modes_out Pointer to hold arrays of output @ref CeedEvalMode, or `NULL` 1497 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1498 @param[out] num_output_components The number of columns in the assembled `CeedQFunction` matrix for each quadrature point, including contributions of all active bases 1499 1500 @return An error code: 0 - success, otherwise - failure 1501 1502 @ref Backend 1503 **/ 1504 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in, 1505 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out, 1506 CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, 1507 CeedSize *num_output_components) { 1508 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1509 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1510 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1511 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1512 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1513 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1514 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1515 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1516 if (num_output_components) *num_output_components = data->num_output_components; 1517 return CEED_ERROR_SUCCESS; 1518 } 1519 1520 /** 1521 @brief Get `CeedOperator` `CeedBasis` data for assembly. 1522 1523 Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object. 1524 1525 @param[in] data `CeedOperatorAssemblyData` 1526 @param[out] num_active_bases_in Number of active input bases, or `NULL` 1527 @param[out] active_bases_in Pointer to hold active input `CeedBasis`, or `NULL` 1528 @param[out] assembled_bases_in Pointer to hold assembled active input `B` , or `NULL` 1529 @param[out] num_active_bases_out Number of active output bases, or `NULL` 1530 @param[out] active_bases_out Pointer to hold active output `CeedBasis`, or `NULL` 1531 @param[out] assembled_bases_out Pointer to hold assembled active output `B` , or `NULL` 1532 1533 @return An error code: 0 - success, otherwise - failure 1534 1535 @ref Backend 1536 **/ 1537 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in, 1538 const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out, 1539 const CeedScalar ***assembled_bases_out) { 1540 // Assemble B_in, B_out if needed 1541 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1542 CeedInt num_qpts; 1543 1544 if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts)); 1545 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts)); 1546 for (CeedInt b = 0; b < data->num_active_bases_in; b++) { 1547 bool has_eval_none = false; 1548 CeedInt num_nodes; 1549 CeedScalar *B_in = NULL, *identity = NULL; 1550 1551 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes)); 1552 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1553 1554 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1555 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1556 } 1557 if (has_eval_none) { 1558 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1559 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1560 identity[i * num_nodes + i] = 1.0; 1561 } 1562 } 1563 1564 for (CeedInt q = 0; q < num_qpts; q++) { 1565 for (CeedInt n = 0; n < num_nodes; n++) { 1566 CeedInt d_in = 0, q_comp_in; 1567 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1568 1569 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1570 const CeedInt qq = data->num_eval_modes_in[b] * q; 1571 const CeedScalar *B = NULL; 1572 1573 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B)); 1574 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1575 if (q_comp_in > 1) { 1576 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1577 else B = &B[(++d_in) * num_qpts * num_nodes]; 1578 } 1579 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1580 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1581 } 1582 } 1583 } 1584 if (identity) CeedCall(CeedFree(&identity)); 1585 data->assembled_bases_in[b] = B_in; 1586 } 1587 } 1588 1589 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1590 CeedInt num_qpts; 1591 1592 if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts)); 1593 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts)); 1594 for (CeedInt b = 0; b < data->num_active_bases_out; b++) { 1595 bool has_eval_none = false; 1596 CeedInt num_nodes; 1597 CeedScalar *B_out = NULL, *identity = NULL; 1598 1599 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes)); 1600 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1601 1602 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1603 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1604 } 1605 if (has_eval_none) { 1606 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1607 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1608 identity[i * num_nodes + i] = 1.0; 1609 } 1610 } 1611 1612 for (CeedInt q = 0; q < num_qpts; q++) { 1613 for (CeedInt n = 0; n < num_nodes; n++) { 1614 CeedInt d_out = 0, q_comp_out; 1615 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1616 1617 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1618 const CeedInt qq = data->num_eval_modes_out[b] * q; 1619 const CeedScalar *B = NULL; 1620 1621 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B)); 1622 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1623 if (q_comp_out > 1) { 1624 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1625 else B = &B[(++d_out) * num_qpts * num_nodes]; 1626 } 1627 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1628 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1629 } 1630 } 1631 } 1632 if (identity) CeedCall(CeedFree(&identity)); 1633 data->assembled_bases_out[b] = B_out; 1634 } 1635 } 1636 1637 // Pass out assembled data 1638 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1639 if (active_bases_in) *active_bases_in = data->active_bases_in; 1640 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1641 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1642 if (active_bases_out) *active_bases_out = data->active_bases_out; 1643 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1644 return CEED_ERROR_SUCCESS; 1645 } 1646 1647 /** 1648 @brief Get `CeedOperator` `CeedBasis` data for assembly. 1649 1650 Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object. 1651 1652 @param[in] data `CeedOperatorAssemblyData` 1653 @param[out] num_active_elem_rstrs_in Number of active input element restrictions, or `NULL` 1654 @param[out] active_elem_rstrs_in Pointer to hold active input `CeedElemRestriction`, or `NULL` 1655 @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or `NULL` 1656 @param[out] active_elem_rstrs_out Pointer to hold active output `CeedElemRestriction`, or `NULL` 1657 1658 @return An error code: 0 - success, otherwise - failure 1659 1660 @ref Backend 1661 **/ 1662 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in, 1663 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out, 1664 CeedElemRestriction **active_elem_rstrs_out) { 1665 if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in; 1666 if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in; 1667 if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out; 1668 if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out; 1669 return CEED_ERROR_SUCCESS; 1670 } 1671 1672 /** 1673 @brief Destroy `CeedOperatorAssemblyData` 1674 1675 @param[in,out] data `CeedOperatorAssemblyData` to destroy 1676 1677 @return An error code: 0 - success, otherwise - failure 1678 1679 @ref Backend 1680 **/ 1681 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1682 if (!*data) { 1683 *data = NULL; 1684 return CEED_ERROR_SUCCESS; 1685 } 1686 CeedCall(CeedDestroy(&(*data)->ceed)); 1687 for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) { 1688 CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b])); 1689 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b])); 1690 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1691 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1692 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1693 } 1694 for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) { 1695 CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b])); 1696 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b])); 1697 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1698 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1699 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1700 } 1701 CeedCall(CeedFree(&(*data)->active_bases_in)); 1702 CeedCall(CeedFree(&(*data)->active_bases_out)); 1703 CeedCall(CeedFree(&(*data)->active_elem_rstrs_in)); 1704 CeedCall(CeedFree(&(*data)->active_elem_rstrs_out)); 1705 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1706 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1707 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1708 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1709 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1710 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1711 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1712 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1713 1714 CeedCall(CeedFree(data)); 1715 return CEED_ERROR_SUCCESS; 1716 } 1717 1718 /** 1719 @brief Retrieve fallback `CeedOperator` with a reference `Ceed` for advanced `CeedOperator` functionality 1720 1721 @param[in] op `CeedOperator` to retrieve fallback for 1722 @param[out] op_fallback Fallback `CeedOperator` 1723 1724 @return An error code: 0 - success, otherwise - failure 1725 1726 @ref Backend 1727 **/ 1728 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 1729 // Create if needed 1730 if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op)); 1731 if (op->op_fallback) { 1732 bool is_debug; 1733 Ceed ceed; 1734 1735 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1736 CeedCall(CeedIsDebug(ceed, &is_debug)); 1737 if (is_debug) { 1738 Ceed ceed_fallback; 1739 const char *resource, *resource_fallback; 1740 1741 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 1742 CeedCall(CeedGetResource(ceed, &resource)); 1743 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 1744 1745 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n"); 1746 CeedDebug(ceed, "Falling back from %s operator at address %p to %s operator at address %p\n", resource, op, resource_fallback, op->op_fallback); 1747 } 1748 } 1749 *op_fallback = op->op_fallback; 1750 return CEED_ERROR_SUCCESS; 1751 } 1752 1753 /** 1754 @brief Get the parent `CeedOperator` for a fallback `CeedOperator` 1755 1756 @param[in] op `CeedOperator` context 1757 @param[out] parent Variable to store parent `CeedOperator` context 1758 1759 @return An error code: 0 - success, otherwise - failure 1760 1761 @ref Backend 1762 **/ 1763 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) { 1764 *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL; 1765 return CEED_ERROR_SUCCESS; 1766 } 1767 1768 /** 1769 @brief Get the `Ceed` context of the parent `CeedOperator` for a fallback `CeedOperator` 1770 1771 @param[in] op `CeedOperator` context 1772 @param[out] parent Variable to store parent `Ceed` context 1773 1774 @return An error code: 0 - success, otherwise - failure 1775 1776 @ref Backend 1777 **/ 1778 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) { 1779 *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed; 1780 return CEED_ERROR_SUCCESS; 1781 } 1782 1783 /// @} 1784 1785 /// ---------------------------------------------------------------------------- 1786 /// CeedOperator Public API 1787 /// ---------------------------------------------------------------------------- 1788 /// @addtogroup CeedOperatorUser 1789 /// @{ 1790 1791 /** 1792 @brief Assemble a linear `CeedQFunction` associated with a `CeedOperator`. 1793 1794 This returns a `CeedVector` containing a matrix at each quadrature point providing the action of the `CeedQFunction` associated with the `CeedOperator`. 1795 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices representing the action of the `CeedQFunction` for a corresponding quadrature point on an element. 1796 1797 Inputs and outputs are in the order provided by the user when adding `CeedOperator` fields. 1798 For example, a `CeedQFunction` with inputs `u` and `gradu` and outputs `gradv` and `v` , provided in that order, would result in an assembled `CeedQFunction` that consists of `(1 + dim) x (dim + 1)` matrices at each quadrature point acting on the input ` [u, du_0, du_1]` and producing the output `[dv_0, dv_1, v]`. 1799 1800 Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable. 1801 1802 @param[in] op `CeedOperator` to assemble `CeedQFunction` 1803 @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points 1804 @param[out] rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction` 1805 @param[in] request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1806 1807 @return An error code: 0 - success, otherwise - failure 1808 1809 @ref User 1810 **/ 1811 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1812 CeedCall(CeedOperatorCheckReady(op)); 1813 1814 if (op->LinearAssembleQFunction) { 1815 // Backend version 1816 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1817 } else { 1818 // Operator fallback 1819 Ceed ceed; 1820 CeedOperator op_fallback; 1821 1822 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1823 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1824 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1825 else return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1826 } 1827 return CEED_ERROR_SUCCESS; 1828 } 1829 1830 /** 1831 @brief Assemble `CeedQFunction` and store result internally. 1832 1833 Return copied references of stored data to the caller. 1834 Caller is responsible for ownership and destruction of the copied references. 1835 See also @ref CeedOperatorLinearAssembleQFunction(). 1836 1837 Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers. 1838 These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object. 1839 1840 @param[in] op `CeedOperator` to assemble `CeedQFunction` 1841 @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points 1842 @param[out] rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction` 1843 @param[in] request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1844 1845 @return An error code: 0 - success, otherwise - failure 1846 1847 @ref User 1848 **/ 1849 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1850 int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL; 1851 CeedOperator op_assemble = NULL; 1852 CeedOperator op_fallback_parent = NULL; 1853 1854 CeedCall(CeedOperatorCheckReady(op)); 1855 1856 // Determine if fallback parent or operator has implementation 1857 CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent)); 1858 if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) { 1859 // -- Backend version for op fallback parent is faster, if it exists 1860 LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate; 1861 op_assemble = op_fallback_parent; 1862 } else if (op->LinearAssembleQFunctionUpdate) { 1863 // -- Backend version for op 1864 LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate; 1865 op_assemble = op; 1866 } 1867 1868 // Assemble QFunction 1869 if (LinearAssembleQFunctionUpdate) { 1870 // Backend or fallback parent version 1871 bool qf_assembled_is_setup; 1872 CeedVector assembled_vec = NULL; 1873 CeedElemRestriction assembled_rstr = NULL; 1874 1875 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1876 if (qf_assembled_is_setup) { 1877 bool update_needed; 1878 1879 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1880 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1881 if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request)); 1882 } else { 1883 CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request)); 1884 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1885 } 1886 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1887 1888 // Copy reference from internally held copy 1889 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1890 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1891 CeedCall(CeedVectorDestroy(&assembled_vec)); 1892 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1893 } else { 1894 // Operator fallback 1895 Ceed ceed; 1896 CeedOperator op_fallback; 1897 1898 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1899 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1900 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1901 else return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1902 } 1903 return CEED_ERROR_SUCCESS; 1904 } 1905 1906 /** 1907 @brief Assemble the diagonal of a square linear `CeedOperator` 1908 1909 This overwrites a `CeedVector` with the diagonal of a linear `CeedOperator`. 1910 1911 Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported. 1912 1913 Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable. 1914 1915 @param[in] op `CeedOperator` to assemble `CeedQFunction` 1916 @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal 1917 @param[in] request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1918 1919 @return An error code: 0 - success, otherwise - failure 1920 1921 @ref User 1922 **/ 1923 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1924 bool is_composite; 1925 CeedSize input_size = 0, output_size = 0; 1926 Ceed ceed; 1927 1928 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1929 CeedCall(CeedOperatorCheckReady(op)); 1930 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1931 1932 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1933 CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1934 1935 // Early exit for empty operator 1936 if (!is_composite) { 1937 CeedInt num_elem = 0; 1938 1939 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1940 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1941 } 1942 1943 if (op->LinearAssembleDiagonal) { 1944 // Backend version 1945 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1946 return CEED_ERROR_SUCCESS; 1947 } else if (op->LinearAssembleAddDiagonal) { 1948 // Backend version with zeroing first 1949 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1950 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1951 return CEED_ERROR_SUCCESS; 1952 } else { 1953 // Operator fallback 1954 CeedOperator op_fallback; 1955 1956 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1957 if (op_fallback) { 1958 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1959 return CEED_ERROR_SUCCESS; 1960 } 1961 } 1962 // Default interface implementation 1963 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1964 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1965 return CEED_ERROR_SUCCESS; 1966 } 1967 1968 /** 1969 @brief Assemble the diagonal of a square linear `CeedOperator`. 1970 1971 This sums into a `CeedVector` the diagonal of a linear `CeedOperator`. 1972 1973 Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported. 1974 1975 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1976 1977 @param[in] op `CeedOperator` to assemble `CeedQFunction` 1978 @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal 1979 @param[in] request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1980 1981 @return An error code: 0 - success, otherwise - failure 1982 1983 @ref User 1984 **/ 1985 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1986 bool is_composite; 1987 CeedSize input_size = 0, output_size = 0; 1988 Ceed ceed; 1989 1990 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1991 CeedCall(CeedOperatorCheckReady(op)); 1992 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1993 1994 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1995 CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1996 1997 // Early exit for empty operator 1998 if (!is_composite) { 1999 CeedInt num_elem = 0; 2000 2001 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2002 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2003 } 2004 2005 if (op->LinearAssembleAddDiagonal) { 2006 // Backend version 2007 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 2008 return CEED_ERROR_SUCCESS; 2009 } else { 2010 // Operator fallback 2011 CeedOperator op_fallback; 2012 2013 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2014 if (op_fallback) { 2015 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 2016 return CEED_ERROR_SUCCESS; 2017 } 2018 } 2019 // Default interface implementation 2020 if (is_composite) { 2021 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 2022 } else { 2023 CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 2024 } 2025 return CEED_ERROR_SUCCESS; 2026 } 2027 2028 /** 2029 @brief Fully assemble the point-block diagonal pattern of a linear `CeedOperator`. 2030 2031 Expected to be used in conjunction with @ref CeedOperatorLinearAssemblePointBlockDiagonal(). 2032 2033 The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`. 2034 Note that the `(i, j)` pairs are unique. 2035 This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in the same ordering. 2036 2037 This will generally be slow unless your operator is low-order. 2038 2039 Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable. 2040 2041 @param[in] op `CeedOperator` to assemble 2042 @param[out] num_entries Number of entries in coordinate nonzero pattern 2043 @param[out] rows Row number for each entry 2044 @param[out] cols Column number for each entry 2045 2046 @ref User 2047 **/ 2048 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 2049 Ceed ceed; 2050 bool is_composite; 2051 CeedInt num_active_components, num_sub_operators; 2052 CeedOperator *sub_operators; 2053 2054 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2055 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2056 2057 CeedSize input_size = 0, output_size = 0; 2058 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2059 CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2060 2061 if (is_composite) { 2062 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators)); 2063 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2064 } else { 2065 sub_operators = &op; 2066 num_sub_operators = 1; 2067 } 2068 2069 // Verify operator can be assembled correctly 2070 { 2071 CeedOperatorAssemblyData data; 2072 CeedInt num_active_elem_rstrs, comp_stride; 2073 CeedElemRestriction *active_elem_rstrs; 2074 2075 // Get initial values to check against 2076 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data)); 2077 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 2078 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride)); 2079 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components)); 2080 2081 // Verify that all active element restrictions have same component stride and number of components 2082 for (CeedInt k = 0; k < num_sub_operators; k++) { 2083 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data)); 2084 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 2085 for (CeedInt i = 0; i < num_active_elem_rstrs; i++) { 2086 CeedInt comp_stride_sub, num_active_components_sub; 2087 2088 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub)); 2089 CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION, 2090 "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub); 2091 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub)); 2092 CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE, 2093 "All suboperators must have the same number of output components"); 2094 } 2095 } 2096 } 2097 *num_entries = input_size * num_active_components; 2098 CeedCall(CeedCalloc(*num_entries, rows)); 2099 CeedCall(CeedCalloc(*num_entries, cols)); 2100 2101 for (CeedInt o = 0; o < num_sub_operators; o++) { 2102 CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr; 2103 CeedInt comp_stride, num_elem, elem_size; 2104 const CeedInt *offsets, *point_block_offsets; 2105 2106 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr)); 2107 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride)); 2108 CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem)); 2109 CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size)); 2110 CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets)); 2111 2112 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr)); 2113 CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets)); 2114 2115 for (CeedSize i = 0; i < num_elem * elem_size; i++) { 2116 for (CeedInt c_out = 0; c_out < num_active_components; c_out++) { 2117 for (CeedInt c_in = 0; c_in < num_active_components; c_in++) { 2118 (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride; 2119 (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride; 2120 } 2121 } 2122 } 2123 2124 CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets)); 2125 CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets)); 2126 CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr)); 2127 } 2128 return CEED_ERROR_SUCCESS; 2129 } 2130 2131 /** 2132 @brief Assemble the point block diagonal of a square linear `CeedOperator`. 2133 2134 This overwrites a `CeedVector` with the point block diagonal of a linear `CeedOperator`. 2135 2136 Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported. 2137 2138 Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable. 2139 2140 @param[in] op `CeedOperator` to assemble `CeedQFunction` 2141 @param[out] assembled `CeedVector` to store assembled `CeedOperator` point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node. 2142 The dimensions of this vector are derived from the active vector for the `CeedOperator`. 2143 The array has shape `[nodes, component out, component in]`. 2144 @param[in] request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2145 2146 @return An error code: 0 - success, otherwise - failure 2147 2148 @ref User 2149 **/ 2150 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2151 bool is_composite; 2152 CeedSize input_size = 0, output_size = 0; 2153 Ceed ceed; 2154 2155 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2156 CeedCall(CeedOperatorCheckReady(op)); 2157 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2158 2159 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2160 CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2161 2162 // Early exit for empty operator 2163 if (!is_composite) { 2164 CeedInt num_elem = 0; 2165 2166 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2167 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2168 } 2169 2170 if (op->LinearAssemblePointBlockDiagonal) { 2171 // Backend version 2172 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 2173 return CEED_ERROR_SUCCESS; 2174 } else if (op->LinearAssembleAddPointBlockDiagonal) { 2175 // Backend version with zeroing first 2176 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2177 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2178 return CEED_ERROR_SUCCESS; 2179 } else { 2180 // Operator fallback 2181 CeedOperator op_fallback; 2182 2183 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2184 if (op_fallback) { 2185 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 2186 return CEED_ERROR_SUCCESS; 2187 } 2188 } 2189 // Default interface implementation 2190 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2191 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2192 return CEED_ERROR_SUCCESS; 2193 } 2194 2195 /** 2196 @brief Assemble the point block diagonal of a square linear `CeedOperator`. 2197 2198 This sums into a `CeedVector` with the point block diagonal of a linear `CeedOperator`. 2199 2200 Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported. 2201 2202 Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable. 2203 2204 @param[in] op `CeedOperator` to assemble `CeedQFunction` 2205 @param[out] assembled `CeedVector` to store assembled CeedOperator point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node. 2206 The dimensions of this vector are derived from the active vector for the `CeedOperator`. 2207 The array has shape `[nodes, component out, component in]`. 2208 @param[in] request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2209 2210 @return An error code: 0 - success, otherwise - failure 2211 2212 @ref User 2213 **/ 2214 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2215 bool is_composite; 2216 CeedSize input_size = 0, output_size = 0; 2217 Ceed ceed; 2218 2219 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2220 CeedCall(CeedOperatorCheckReady(op)); 2221 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2222 2223 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2224 CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2225 2226 // Early exit for empty operator 2227 if (!is_composite) { 2228 CeedInt num_elem = 0; 2229 2230 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2231 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2232 } 2233 2234 if (op->LinearAssembleAddPointBlockDiagonal) { 2235 // Backend version 2236 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2237 return CEED_ERROR_SUCCESS; 2238 } else { 2239 // Operator fallback 2240 CeedOperator op_fallback; 2241 2242 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2243 if (op_fallback) { 2244 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 2245 return CEED_ERROR_SUCCESS; 2246 } 2247 } 2248 // Default interface implementation 2249 if (is_composite) { 2250 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 2251 } else { 2252 CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 2253 } 2254 return CEED_ERROR_SUCCESS; 2255 } 2256 2257 /** 2258 @brief Fully assemble the nonzero pattern of a linear `CeedOperator`. 2259 2260 Expected to be used in conjunction with @ref CeedOperatorLinearAssemble(). 2261 2262 The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`. 2263 Note that the `(i, j)` pairs are not unique and may repeat. 2264 This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemble() provides the values in the same ordering. 2265 2266 This will generally be slow unless your operator is low-order. 2267 2268 Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable. 2269 2270 @param[in] op `CeedOperator` to assemble 2271 @param[out] num_entries Number of entries in coordinate nonzero pattern 2272 @param[out] rows Row number for each entry 2273 @param[out] cols Column number for each entry 2274 2275 @ref User 2276 **/ 2277 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 2278 bool is_composite; 2279 CeedInt num_suboperators, offset = 0; 2280 CeedSize single_entries; 2281 CeedOperator *sub_operators; 2282 2283 CeedCall(CeedOperatorCheckReady(op)); 2284 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2285 2286 if (op->LinearAssembleSymbolic) { 2287 // Backend version 2288 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 2289 return CEED_ERROR_SUCCESS; 2290 } else { 2291 // Operator fallback 2292 CeedOperator op_fallback; 2293 2294 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2295 if (op_fallback) { 2296 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 2297 return CEED_ERROR_SUCCESS; 2298 } 2299 } 2300 2301 // Default interface implementation 2302 2303 // Count entries and allocate rows, cols arrays 2304 *num_entries = 0; 2305 if (is_composite) { 2306 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2307 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2308 for (CeedInt k = 0; k < num_suboperators; ++k) { 2309 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2310 *num_entries += single_entries; 2311 } 2312 } else { 2313 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 2314 *num_entries += single_entries; 2315 } 2316 CeedCall(CeedCalloc(*num_entries, rows)); 2317 CeedCall(CeedCalloc(*num_entries, cols)); 2318 2319 // Assemble nonzero locations 2320 if (is_composite) { 2321 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2322 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2323 for (CeedInt k = 0; k < num_suboperators; ++k) { 2324 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 2325 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2326 offset += single_entries; 2327 } 2328 } else { 2329 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 2330 } 2331 return CEED_ERROR_SUCCESS; 2332 } 2333 2334 /** 2335 @brief Fully assemble the nonzero entries of a linear operator. 2336 2337 Expected to be used in conjunction with @ref CeedOperatorLinearAssembleSymbolic(). 2338 2339 The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`. 2340 Note that the `(i, j)` pairs are not unique and may repeat. 2341 This function returns the values of the nonzero entries to be added, their `(i, j)` locations are provided by @ref CeedOperatorLinearAssembleSymbolic(). 2342 2343 This will generally be slow unless your operator is low-order. 2344 2345 Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable. 2346 2347 @param[in] op `CeedOperator` to assemble 2348 @param[out] values Values to assemble into matrix 2349 2350 @ref User 2351 **/ 2352 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 2353 bool is_composite; 2354 CeedInt num_suboperators, offset = 0; 2355 CeedSize single_entries = 0; 2356 CeedOperator *sub_operators; 2357 2358 CeedCall(CeedOperatorCheckReady(op)); 2359 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2360 2361 // Early exit for empty operator 2362 if (!is_composite) { 2363 CeedInt num_elem = 0; 2364 2365 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2366 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2367 } 2368 2369 if (op->LinearAssemble) { 2370 // Backend version 2371 CeedCall(op->LinearAssemble(op, values)); 2372 return CEED_ERROR_SUCCESS; 2373 } else { 2374 // Operator fallback 2375 CeedOperator op_fallback; 2376 2377 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2378 if (op_fallback) { 2379 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2380 return CEED_ERROR_SUCCESS; 2381 } 2382 } 2383 2384 // Default interface implementation 2385 CeedCall(CeedVectorSetValue(values, 0.0)); 2386 if (is_composite) { 2387 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2388 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2389 for (CeedInt k = 0; k < num_suboperators; k++) { 2390 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2391 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2392 offset += single_entries; 2393 } 2394 } else { 2395 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2396 } 2397 return CEED_ERROR_SUCCESS; 2398 } 2399 2400 /** 2401 @brief Get the multiplicity of nodes across sub-operators in a composite `CeedOperator`. 2402 2403 Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable. 2404 2405 @param[in] op Composite `CeedOperator` 2406 @param[in] num_skip_indices Number of sub-operators to skip 2407 @param[in] skip_indices Array of indices of sub-operators to skip 2408 @param[out] mult Vector to store multiplicity (of size `l_size` ) 2409 2410 @return An error code: 0 - success, otherwise - failure 2411 2412 @ref User 2413 **/ 2414 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2415 Ceed ceed; 2416 CeedInt num_suboperators; 2417 CeedSize l_vec_len; 2418 CeedScalar *mult_array; 2419 CeedVector ones_l_vec; 2420 CeedElemRestriction elem_rstr, mult_elem_rstr; 2421 CeedOperator *sub_operators; 2422 2423 CeedCall(CeedOperatorCheckReady(op)); 2424 2425 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2426 2427 // Zero mult vector 2428 CeedCall(CeedVectorSetValue(mult, 0.0)); 2429 2430 // Get suboperators 2431 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2432 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2433 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2434 2435 // Work vector 2436 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2437 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2438 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2439 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2440 2441 // Compute multiplicity across suboperators 2442 for (CeedInt i = 0; i < num_suboperators; i++) { 2443 const CeedScalar *sub_mult_array; 2444 CeedVector sub_mult_l_vec, ones_e_vec; 2445 2446 // -- Check for suboperator to skip 2447 for (CeedInt j = 0; j < num_skip_indices; j++) { 2448 if (skip_indices[j] == i) continue; 2449 } 2450 2451 // -- Sub operator multiplicity 2452 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2453 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr)); 2454 CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2455 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2456 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2457 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2458 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2459 // ---- Flag every node present in the current suboperator 2460 for (CeedInt j = 0; j < l_vec_len; j++) { 2461 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2462 } 2463 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2464 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2465 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2466 CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr)); 2467 } 2468 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2469 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2470 return CEED_ERROR_SUCCESS; 2471 } 2472 2473 /** 2474 @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`, creating the prolongation basis from the fine and coarse grid interpolation. 2475 2476 Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable. 2477 2478 @param[in] op_fine Fine grid `CeedOperator` 2479 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator` 2480 @param[in] rstr_coarse Coarse grid `CeedElemRestriction` 2481 @param[in] basis_coarse Coarse grid active vector `CeedBasis` 2482 @param[out] op_coarse Coarse grid `CeedOperator` 2483 @param[out] op_prolong Coarse to fine `CeedOperator`, or `NULL` 2484 @param[out] op_restrict Fine to coarse `CeedOperator`, or `NULL` 2485 2486 @return An error code: 0 - success, otherwise - failure 2487 2488 @ref User 2489 **/ 2490 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2491 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2492 CeedBasis basis_c_to_f = NULL; 2493 2494 CeedCall(CeedOperatorCheckReady(op_fine)); 2495 2496 // Build prolongation matrix, if required 2497 if (op_prolong || op_restrict) { 2498 CeedBasis basis_fine; 2499 2500 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2501 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2502 } 2503 2504 // Core code 2505 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2506 return CEED_ERROR_SUCCESS; 2507 } 2508 2509 /** 2510 @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a tensor basis for the active basis. 2511 2512 Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable. 2513 2514 @param[in] op_fine Fine grid `CeedOperator` 2515 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator` 2516 @param[in] rstr_coarse Coarse grid `CeedElemRestriction` 2517 @param[in] basis_coarse Coarse grid active vector `CeedBasis` 2518 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator` 2519 @param[out] op_coarse Coarse grid `CeedOperator` 2520 @param[out] op_prolong Coarse to fine `CeedOperator`, or `NULL` 2521 @param[out] op_restrict Fine to coarse `CeedOperator`, or `NULL` 2522 2523 @return An error code: 0 - success, otherwise - failure 2524 2525 @ref User 2526 **/ 2527 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2528 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2529 CeedOperator *op_restrict) { 2530 Ceed ceed; 2531 CeedInt Q_f, Q_c; 2532 CeedBasis basis_fine, basis_c_to_f = NULL; 2533 2534 CeedCall(CeedOperatorCheckReady(op_fine)); 2535 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2536 2537 // Check for compatible quadrature spaces 2538 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2539 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2540 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2541 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2542 2543 // Create coarse to fine basis, if required 2544 if (op_prolong || op_restrict) { 2545 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2546 CeedScalar *q_ref, *q_weight, *grad; 2547 2548 // Check if interpolation matrix is provided 2549 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2550 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2551 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2552 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2553 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2554 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2555 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2556 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2557 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2558 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2559 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2560 CeedCall(CeedFree(&q_ref)); 2561 CeedCall(CeedFree(&q_weight)); 2562 CeedCall(CeedFree(&grad)); 2563 } 2564 2565 // Core code 2566 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2567 return CEED_ERROR_SUCCESS; 2568 } 2569 2570 /** 2571 @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a non-tensor basis for the active vector 2572 2573 Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable. 2574 2575 @param[in] op_fine Fine grid `CeedOperator` 2576 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator` 2577 @param[in] rstr_coarse Coarse grid `CeedElemRestriction` 2578 @param[in] basis_coarse Coarse grid active vector `CeedBasis` 2579 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator` 2580 @param[out] op_coarse Coarse grid `CeedOperator` 2581 @param[out] op_prolong Coarse to fine `CeedOperator`, or `NULL` 2582 @param[out] op_restrict Fine to coarse `CeedOperator`, or `NULL` 2583 2584 @return An error code: 0 - success, otherwise - failure 2585 2586 @ref User 2587 **/ 2588 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2589 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2590 CeedOperator *op_restrict) { 2591 Ceed ceed; 2592 CeedInt Q_f, Q_c; 2593 CeedBasis basis_fine, basis_c_to_f = NULL; 2594 2595 CeedCall(CeedOperatorCheckReady(op_fine)); 2596 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2597 2598 // Check for compatible quadrature spaces 2599 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2600 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2601 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2602 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2603 2604 // Coarse to fine basis 2605 if (op_prolong || op_restrict) { 2606 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2607 CeedScalar *q_ref, *q_weight, *grad; 2608 CeedElemTopology topo; 2609 2610 // Check if interpolation matrix is provided 2611 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2612 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2613 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2614 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2615 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2616 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2617 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2618 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2619 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2620 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2621 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2622 CeedCall(CeedFree(&q_ref)); 2623 CeedCall(CeedFree(&q_weight)); 2624 CeedCall(CeedFree(&grad)); 2625 } 2626 2627 // Core code 2628 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2629 return CEED_ERROR_SUCCESS; 2630 } 2631 2632 /** 2633 @brief Build a FDM based approximate inverse for each element for a `CeedOperator`. 2634 2635 This returns a `CeedOperator` and `CeedVector` to apply a Fast Diagonalization Method based approximate inverse. 2636 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2637 The assembled `CeedQFunction` is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T \hat S V\f$. 2638 The `CeedOperator` must be linear and non-composite. 2639 The associated `CeedQFunction` must therefore also be linear. 2640 2641 Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable. 2642 2643 @param[in] op `CeedOperator` to create element inverses 2644 @param[out] fdm_inv `CeedOperator` to apply the action of a FDM based inverse for each element 2645 @param[in] request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2646 2647 @return An error code: 0 - success, otherwise - failure 2648 2649 @ref User 2650 **/ 2651 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2652 Ceed ceed, ceed_parent; 2653 bool interp = false, grad = false, is_tensor_basis = true; 2654 CeedInt num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2655 CeedSize l_size = 1; 2656 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg; 2657 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2658 CeedVector q_data; 2659 CeedElemRestriction rstr = NULL, rstr_qd_i; 2660 CeedBasis basis = NULL, fdm_basis; 2661 CeedQFunctionContext ctx_fdm; 2662 CeedQFunctionField *qf_fields; 2663 CeedQFunction qf, qf_fdm; 2664 CeedOperatorField *op_fields; 2665 2666 CeedCall(CeedOperatorCheckReady(op)); 2667 2668 if (op->CreateFDMElementInverse) { 2669 // Backend version 2670 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2671 return CEED_ERROR_SUCCESS; 2672 } else { 2673 // Operator fallback 2674 CeedOperator op_fallback; 2675 2676 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2677 if (op_fallback) { 2678 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2679 return CEED_ERROR_SUCCESS; 2680 } 2681 } 2682 2683 // Default interface implementation 2684 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2685 CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent)); 2686 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2687 2688 // Determine active input basis 2689 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2690 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2691 for (CeedInt i = 0; i < num_input_fields; i++) { 2692 CeedVector vec; 2693 2694 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2695 if (vec == CEED_VECTOR_ACTIVE) { 2696 CeedEvalMode eval_mode; 2697 2698 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2699 interp = interp || eval_mode == CEED_EVAL_INTERP; 2700 grad = grad || eval_mode == CEED_EVAL_GRAD; 2701 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2702 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2703 } 2704 } 2705 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2706 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2707 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2708 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2709 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2710 CeedCall(CeedBasisGetDimension(basis, &dim)); 2711 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2712 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2713 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2714 2715 // Build and diagonalize 1D Mass and Laplacian 2716 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2717 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2718 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2719 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2720 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2721 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2722 CeedCall(CeedCalloc(P_1d, &lambda)); 2723 // -- Build matrices 2724 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2725 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2726 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2727 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2728 2729 // -- Diagonalize 2730 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2731 CeedCall(CeedFree(&mass)); 2732 CeedCall(CeedFree(&laplace)); 2733 for (CeedInt i = 0; i < P_1d; i++) { 2734 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2735 } 2736 CeedCall(CeedFree(&x)); 2737 2738 { 2739 CeedInt layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2740 CeedScalar max_norm = 0; 2741 const CeedScalar *assembled_array, *q_weight_array; 2742 CeedVector assembled = NULL, q_weight; 2743 CeedElemRestriction rstr_qf = NULL; 2744 2745 // Assemble QFunction 2746 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2747 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, layout)); 2748 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2749 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2750 2751 // Calculate element averages 2752 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2753 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2754 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2755 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2756 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2757 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2758 2759 for (CeedInt e = 0; e < num_elem; e++) { 2760 CeedInt count = 0; 2761 2762 for (CeedInt q = 0; q < num_qpts; q++) { 2763 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2764 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2765 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2766 count++; 2767 } 2768 } 2769 } 2770 if (count) { 2771 elem_avg[e] /= count; 2772 } else { 2773 elem_avg[e] = 1.0; 2774 } 2775 } 2776 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2777 CeedCall(CeedVectorDestroy(&assembled)); 2778 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2779 CeedCall(CeedVectorDestroy(&q_weight)); 2780 } 2781 2782 // Build FDM diagonal 2783 { 2784 CeedScalar *q_data_array, *fdm_diagonal; 2785 2786 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2787 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2788 for (CeedInt c = 0; c < num_comp; c++) { 2789 for (CeedInt n = 0; n < num_nodes; n++) { 2790 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2791 if (grad) { 2792 for (CeedInt d = 0; d < dim; d++) { 2793 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2794 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2795 } 2796 } 2797 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2798 } 2799 } 2800 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2801 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2802 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2803 for (CeedInt e = 0; e < num_elem; e++) { 2804 for (CeedInt c = 0; c < num_comp; c++) { 2805 for (CeedInt n = 0; n < num_nodes; n++) 2806 q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2807 } 2808 } 2809 CeedCall(CeedFree(&elem_avg)); 2810 CeedCall(CeedFree(&fdm_diagonal)); 2811 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2812 } 2813 2814 // Setup FDM operator 2815 // -- Basis 2816 { 2817 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2818 2819 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2820 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2821 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2822 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2823 CeedCall(CeedFree(&fdm_interp)); 2824 CeedCall(CeedFree(&grad_dummy)); 2825 CeedCall(CeedFree(&q_ref_dummy)); 2826 CeedCall(CeedFree(&q_weight_dummy)); 2827 CeedCall(CeedFree(&lambda)); 2828 } 2829 2830 // -- Restriction 2831 { 2832 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2833 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2834 } 2835 2836 // -- QFunction 2837 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2838 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2839 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2840 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2841 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2842 2843 // -- QFunction context 2844 { 2845 CeedInt *num_comp_data; 2846 2847 CeedCall(CeedCalloc(1, &num_comp_data)); 2848 num_comp_data[0] = num_comp; 2849 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2850 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2851 } 2852 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2853 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2854 2855 // -- Operator 2856 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2857 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2858 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data)); 2859 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2860 2861 // Cleanup 2862 CeedCall(CeedVectorDestroy(&q_data)); 2863 CeedCall(CeedBasisDestroy(&fdm_basis)); 2864 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2865 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2866 return CEED_ERROR_SUCCESS; 2867 } 2868 2869 /// @} 2870