1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 char *source_path_with_name = NULL; 39 40 // Check if NULL qf passed in 41 if (!qf) return CEED_ERROR_SUCCESS; 42 43 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 44 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 45 46 if (qf->source_path) { 47 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 48 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 49 memcpy(source_path_with_name, qf->source_path, path_len); 50 memcpy(&source_path_with_name[path_len], ":", 1); 51 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 52 } else { 53 CeedCall(CeedCalloc(1, &source_path_with_name)); 54 } 55 56 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 57 { 58 CeedQFunctionContext ctx; 59 60 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 61 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 62 } 63 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 64 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 65 } 66 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 67 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 68 } 69 CeedCall(CeedFree(&source_path_with_name)); 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 Ceed ceed_fallback; 84 bool is_composite; 85 CeedOperator op_fallback; 86 87 // Check not already created 88 if (op->op_fallback) return CEED_ERROR_SUCCESS; 89 90 // Fallback Ceed 91 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 92 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 93 94 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 95 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 96 97 // Clone Op 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 117 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 118 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 119 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 120 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 121 op->input_fields[i]->vec)); 122 } 123 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 124 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 125 op->output_fields[i]->vec)); 126 } 127 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 128 // Cleanup 129 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 130 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 131 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 132 } 133 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 134 CeedCall(CeedOperatorCheckReady(op_fallback)); 135 // Note: No ref-counting here so we don't get caught in a reference loop. 136 // The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback. 137 op->op_fallback = op_fallback; 138 op_fallback->op_fallback_parent = op; 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Select correct basis matrix pointer based on CeedEvalMode 144 145 @param[in] basis CeedBasis from which to get the basis matrix 146 @param[in] eval_mode Current basis evaluation mode 147 @param[in] identity Pointer to identity matrix 148 @param[out] basis_ptr Basis pointer to set 149 150 @ref Developer 151 **/ 152 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 153 switch (eval_mode) { 154 case CEED_EVAL_NONE: 155 *basis_ptr = identity; 156 break; 157 case CEED_EVAL_INTERP: 158 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 159 break; 160 case CEED_EVAL_GRAD: 161 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 162 break; 163 case CEED_EVAL_DIV: 164 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 165 break; 166 case CEED_EVAL_CURL: 167 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 168 break; 169 case CEED_EVAL_WEIGHT: 170 break; // Caught by QF Assembly 171 } 172 assert(*basis_ptr != NULL); 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 181 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 182 @param[out] assembled CeedVector to store assembled diagonal 183 184 @return An error code: 0 - success, otherwise - failure 185 186 @ref Developer 187 **/ 188 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) { 189 Ceed ceed; 190 bool is_composite; 191 192 CeedCall(CeedOperatorGetCeed(op, &ceed)); 193 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 194 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 195 196 // Assemble QFunction 197 CeedInt layout_qf[3]; 198 const CeedScalar *assembled_qf_array; 199 CeedVector assembled_qf = NULL; 200 CeedElemRestriction assembled_elem_rstr = NULL; 201 202 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 203 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); 204 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 205 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 206 207 // Get assembly data 208 const CeedEvalMode **eval_modes_in, **eval_modes_out; 209 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 210 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 211 CeedBasis *active_bases_in, *active_bases_out; 212 CeedElemRestriction *active_elem_rstrs_in, *active_elem_rstrs_out; 213 CeedOperatorAssemblyData data; 214 215 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 216 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 217 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, 218 &num_output_components)); 219 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL)); 220 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out)); 221 222 // Loop over all active bases (find matching input/output pairs) 223 for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) { 224 CeedInt b_in, b_out, num_elem, num_nodes, num_qpts, num_comp; 225 bool has_eval_none = false; 226 CeedScalar *elem_diag_array, *identity = NULL; 227 CeedVector elem_diag; 228 CeedElemRestriction diag_elem_rstr; 229 230 if (num_active_bases_in <= num_active_bases_out) { 231 b_in = b; 232 for (b_out = 0; b_out < num_active_bases_out; b_out++) { 233 if (active_bases_in[b_in] == active_bases_out[b_out]) { 234 break; 235 } 236 } 237 if (b_out == num_active_bases_out) { 238 continue; 239 } // No matching output basis found 240 } else { 241 b_out = b; 242 for (b_in = 0; b_in < num_active_bases_in; b_in++) { 243 if (active_bases_in[b_in] == active_bases_out[b_out]) { 244 break; 245 } 246 } 247 if (b_in == num_active_bases_in) { 248 continue; 249 } // No matching output basis found 250 } 251 CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], ceed, CEED_ERROR_UNSUPPORTED, 252 "Cannot assemble operator diagonal with different input and output active element restrictions"); 253 254 // Assemble point block diagonal restriction, if needed 255 if (is_point_block) { 256 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr)); 257 } else { 258 CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr)); 259 } 260 261 // Create diagonal vector 262 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 263 264 // Assemble element operator diagonals 265 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 266 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 267 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 268 CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes)); 269 CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp)); 270 if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes; 271 else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts)); 272 273 // Construct identity matrix for basis if required 274 for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) { 275 has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE); 276 } 277 for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) { 278 has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE); 279 } 280 if (has_eval_none) { 281 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 282 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 283 } 284 285 // Compute the diagonal of B^T D B 286 // Each element 287 for (CeedSize e = 0; e < num_elem; e++) { 288 // Each basis eval mode pair 289 CeedInt d_out = 0, q_comp_out; 290 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 291 292 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) { 293 CeedInt d_in = 0, q_comp_in; 294 const CeedScalar *B_t = NULL; 295 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 296 297 CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t)); 298 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out)); 299 if (q_comp_out > 1) { 300 if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0; 301 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 302 } 303 eval_mode_out_prev = eval_modes_out[b_out][e_out]; 304 305 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) { 306 const CeedScalar *B = NULL; 307 308 CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B)); 309 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in)); 310 if (q_comp_in > 1) { 311 if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0; 312 else B = &B[(++d_in) * num_qpts * num_nodes]; 313 } 314 eval_mode_in_prev = eval_modes_in[b_in][e_in]; 315 316 // Each component 317 for (CeedInt c_out = 0; c_out < num_comp; c_out++) { 318 // Each qpt/node pair 319 for (CeedInt q = 0; q < num_qpts; q++) { 320 if (is_point_block) { 321 // Point Block Diagonal 322 for (CeedInt c_in = 0; c_in < num_comp; c_in++) { 323 const CeedSize c_offset = 324 (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out; 325 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 326 327 for (CeedInt n = 0; n < num_nodes; n++) { 328 elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] += 329 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 330 } 331 } 332 } else { 333 // Diagonal Only 334 const CeedInt c_offset = 335 (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out; 336 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 337 338 for (CeedInt n = 0; n < num_nodes; n++) { 339 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 340 } 341 } 342 } 343 } 344 } 345 } 346 } 347 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 348 349 // Assemble local operator diagonal 350 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 351 352 // Cleanup 353 CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 354 CeedCall(CeedVectorDestroy(&elem_diag)); 355 CeedCall(CeedFree(&identity)); 356 } 357 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 358 CeedCall(CeedVectorDestroy(&assembled_qf)); 359 return CEED_ERROR_SUCCESS; 360 } 361 362 /** 363 @brief Core logic for assembling composite operator diagonal 364 365 @param[in] op CeedOperator to assemble point block diagonal 366 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 367 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 368 @param[out] assembled CeedVector to store assembled diagonal 369 370 @return An error code: 0 - success, otherwise - failure 371 372 @ref Developer 373 **/ 374 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block, 375 CeedVector assembled) { 376 CeedInt num_sub; 377 CeedOperator *suboperators; 378 379 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 380 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 381 for (CeedInt i = 0; i < num_sub; i++) { 382 if (is_point_block) { 383 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 384 } else { 385 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 386 } 387 } 388 return CEED_ERROR_SUCCESS; 389 } 390 391 /** 392 @brief Build nonzero pattern for non-composite operator 393 394 Users should generally use CeedOperatorLinearAssembleSymbolic() 395 396 @param[in] op CeedOperator to assemble nonzero pattern 397 @param[in] offset Offset for number of entries 398 @param[out] rows Row number for each entry 399 @param[out] cols Column number for each entry 400 401 @return An error code: 0 - success, otherwise - failure 402 403 @ref Developer 404 **/ 405 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 406 Ceed ceed; 407 bool is_composite; 408 CeedSize num_nodes_in, num_nodes_out, count = 0; 409 CeedInt num_elem_in, elem_size_in, num_comp_in, layout_er_in[3]; 410 CeedInt num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries; 411 CeedScalar *array; 412 const CeedScalar *elem_dof_a_in, *elem_dof_a_out; 413 CeedVector index_vec_in, index_vec_out, elem_dof_in, elem_dof_out; 414 CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out; 415 416 CeedCall(CeedOperatorGetCeed(op, &ceed)); 417 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 418 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 419 420 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out)); 421 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 422 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 423 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 424 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 425 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in)); 426 427 // Determine elem_dof relation for input 428 CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in)); 429 CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array)); 430 for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i; 431 CeedCall(CeedVectorRestoreArray(index_vec_in, &array)); 432 CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in)); 433 CeedCall(CeedVectorSetValue(elem_dof_in, 0.0)); 434 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in)); 435 CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE)); 436 CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in)); 437 CeedCall(CeedVectorDestroy(&index_vec_in)); 438 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in)); 439 440 if (elem_rstr_in != elem_rstr_out) { 441 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 442 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 443 "Active input and output operator restrictions must have the same number of elements"); 444 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 445 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 446 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out)); 447 448 // Determine elem_dof relation for output 449 CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out)); 450 CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array)); 451 for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i; 452 CeedCall(CeedVectorRestoreArray(index_vec_out, &array)); 453 CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out)); 454 CeedCall(CeedVectorSetValue(elem_dof_out, 0.0)); 455 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out)); 456 CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE)); 457 CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out)); 458 CeedCall(CeedVectorDestroy(&index_vec_out)); 459 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out)); 460 } else { 461 num_elem_out = num_elem_in; 462 elem_size_out = elem_size_in; 463 num_comp_out = num_comp_in; 464 layout_er_out[0] = layout_er_in[0]; 465 layout_er_out[1] = layout_er_in[1]; 466 layout_er_out[2] = layout_er_in[2]; 467 elem_dof_a_out = elem_dof_a_in; 468 } 469 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 470 471 // Determine i, j locations for element matrices 472 for (CeedInt e = 0; e < num_elem_in; e++) { 473 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 474 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 475 for (CeedInt i = 0; i < elem_size_out; i++) { 476 for (CeedInt j = 0; j < elem_size_in; j++) { 477 const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2]; 478 const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2]; 479 const CeedInt row = elem_dof_a_out[elem_dof_index_row]; 480 const CeedInt col = elem_dof_a_in[elem_dof_index_col]; 481 482 rows[offset + count] = row; 483 cols[offset + count] = col; 484 count++; 485 } 486 } 487 } 488 } 489 } 490 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 491 CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in)); 492 CeedCall(CeedVectorDestroy(&elem_dof_in)); 493 if (elem_rstr_in != elem_rstr_out) { 494 CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out)); 495 CeedCall(CeedVectorDestroy(&elem_dof_out)); 496 } 497 return CEED_ERROR_SUCCESS; 498 } 499 500 /** 501 @brief Assemble nonzero entries for non-composite operator 502 503 Users should generally use CeedOperatorLinearAssemble() 504 505 @param[in] op CeedOperator to assemble 506 @param[in] offset Offset for number of entries 507 @param[out] values Values to assemble into matrix 508 509 @return An error code: 0 - success, otherwise - failure 510 511 @ref Developer 512 **/ 513 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 514 Ceed ceed; 515 bool is_composite; 516 517 CeedCall(CeedOperatorGetCeed(op, &ceed)); 518 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 519 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 520 521 // Early exit for empty operator 522 { 523 CeedInt num_elem = 0; 524 525 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 526 if (num_elem == 0) return CEED_ERROR_SUCCESS; 527 } 528 529 if (op->LinearAssembleSingle) { 530 // Backend version 531 CeedCall(op->LinearAssembleSingle(op, offset, values)); 532 return CEED_ERROR_SUCCESS; 533 } else { 534 // Operator fallback 535 CeedOperator op_fallback; 536 537 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 538 if (op_fallback) { 539 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 540 return CEED_ERROR_SUCCESS; 541 } 542 } 543 544 // Assemble QFunction 545 CeedInt layout_qf[3]; 546 const CeedScalar *assembled_qf_array; 547 CeedVector assembled_qf = NULL; 548 CeedElemRestriction assembled_elem_rstr = NULL; 549 550 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE)); 551 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); 552 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 553 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 554 555 // Get assembly data 556 CeedInt num_elem_in, elem_size_in, num_comp_in, num_qpts_in; 557 CeedInt num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries; 558 const CeedEvalMode **eval_modes_in, **eval_modes_out; 559 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 560 CeedBasis *active_bases_in, *active_bases_out, basis_in, basis_out; 561 const CeedScalar **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out; 562 CeedElemRestriction elem_rstr_in, elem_rstr_out; 563 CeedRestrictionType elem_rstr_type_in, elem_rstr_type_out; 564 const bool *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL; 565 const CeedInt8 *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL; 566 CeedOperatorAssemblyData data; 567 568 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 569 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out, 570 &num_eval_modes_out, &eval_modes_out, NULL, NULL)); 571 572 CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED, 573 "Cannot assemble operator with multiple active bases"); 574 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); 575 576 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out)); 577 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 578 basis_in = active_bases_in[0]; 579 basis_out = active_bases_out[0]; 580 B_mat_in = B_mats_in[0]; 581 B_mat_out = B_mats_out[0]; 582 583 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 584 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 585 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 586 if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in; 587 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in)); 588 589 CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in)); 590 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 591 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in)); 592 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 593 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in)); 594 } 595 596 if (elem_rstr_in != elem_rstr_out) { 597 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 598 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 599 "Active input and output operator restrictions must have the same number of elements"); 600 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 601 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 602 if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out; 603 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out)); 604 CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED, 605 "Active input and output bases must have the same number of quadrature points"); 606 607 CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out)); 608 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 609 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out)); 610 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 611 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out)); 612 } 613 } else { 614 num_elem_out = num_elem_in; 615 elem_size_out = elem_size_in; 616 num_comp_out = num_comp_in; 617 num_qpts_out = num_qpts_in; 618 619 elem_rstr_orients_out = elem_rstr_orients_in; 620 elem_rstr_curl_orients_out = elem_rstr_curl_orients_in; 621 } 622 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 623 624 // Loop over elements and put in data structure 625 // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 626 CeedSize count = 0; 627 CeedScalar *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL; 628 629 CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat)); 630 CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat)); 631 if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b)); 632 633 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 634 for (CeedSize e = 0; e < num_elem_in; e++) { 635 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 636 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 637 // Compute B^T*D 638 for (CeedSize n = 0; n < elem_size_out; n++) { 639 for (CeedSize q = 0; q < num_qpts_in; q++) { 640 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 641 const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in; 642 CeedScalar sum = 0.0; 643 644 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 645 const CeedSize b_out_index = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n; 646 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out; 647 const CeedSize qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 648 649 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 650 } 651 BTD_mat[btd_index] = sum; 652 } 653 } 654 } 655 656 // Form element matrix itself (for each block component) 657 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0])); 658 659 // Transform the element matrix if required 660 if (elem_rstr_orients_out) { 661 const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out]; 662 663 for (CeedInt i = 0; i < elem_size_out; i++) { 664 const double orient = elem_orients[i] ? -1.0 : 1.0; 665 666 for (CeedInt j = 0; j < elem_size_in; j++) { 667 elem_mat[i * elem_size_in + j] *= orient; 668 } 669 } 670 } else if (elem_rstr_curl_orients_out) { 671 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out]; 672 673 // T^T*(B^T*D*B) 674 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 675 for (CeedInt i = 0; i < elem_size_out; i++) { 676 for (CeedInt j = 0; j < elem_size_in; j++) { 677 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] + 678 (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) + 679 (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0); 680 } 681 } 682 } 683 if (elem_rstr_orients_in) { 684 const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in]; 685 686 for (CeedInt i = 0; i < elem_size_out; i++) { 687 for (CeedInt j = 0; j < elem_size_in; j++) { 688 elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0; 689 } 690 } 691 } else if (elem_rstr_curl_orients_in) { 692 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in]; 693 694 // (B^T*D*B)*T 695 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 696 for (CeedInt i = 0; i < elem_size_out; i++) { 697 for (CeedInt j = 0; j < elem_size_in; j++) { 698 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] + 699 (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) + 700 (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0); 701 } 702 } 703 } 704 705 // Put element matrix in coordinate data structure 706 for (CeedInt i = 0; i < elem_size_out; i++) { 707 for (CeedInt j = 0; j < elem_size_in; j++) { 708 vals[offset + count] = elem_mat[i * elem_size_in + j]; 709 count++; 710 } 711 } 712 } 713 } 714 } 715 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 716 CeedCall(CeedVectorRestoreArray(values, &vals)); 717 718 // Cleanup 719 CeedCall(CeedFree(&BTD_mat)); 720 CeedCall(CeedFree(&elem_mat)); 721 CeedCall(CeedFree(&elem_mat_b)); 722 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 723 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in)); 724 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 725 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in)); 726 } 727 if (elem_rstr_in != elem_rstr_out) { 728 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 729 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out)); 730 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 731 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out)); 732 } 733 } 734 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 735 CeedCall(CeedVectorDestroy(&assembled_qf)); 736 return CEED_ERROR_SUCCESS; 737 } 738 739 /** 740 @brief Count number of entries for assembled CeedOperator 741 742 @param[in] op CeedOperator to assemble 743 @param[out] num_entries Number of entries in assembled representation 744 745 @return An error code: 0 - success, otherwise - failure 746 747 @ref Utility 748 **/ 749 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) { 750 bool is_composite; 751 CeedInt num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out; 752 CeedElemRestriction rstr_in, rstr_out; 753 754 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 755 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 756 757 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out)); 758 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in)); 759 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in)); 760 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in)); 761 if (rstr_in != rstr_out) { 762 CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out)); 763 CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED, 764 "Active input and output operator restrictions must have the same number of elements"); 765 CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out)); 766 CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out)); 767 } else { 768 num_elem_out = num_elem_in; 769 elem_size_out = elem_size_in; 770 num_comp_out = num_comp_in; 771 } 772 *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in; 773 return CEED_ERROR_SUCCESS; 774 } 775 776 /** 777 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 778 779 @param[in] op_fine Fine grid operator 780 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 781 @param[in] rstr_coarse Coarse grid restriction 782 @param[in] basis_coarse Coarse grid active vector basis 783 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 784 @param[out] op_coarse Coarse grid operator 785 @param[out] op_prolong Coarse to fine operator, or NULL 786 @param[out] op_restrict Fine to coarse operator, or NULL 787 788 @return An error code: 0 - success, otherwise - failure 789 790 @ref Developer 791 **/ 792 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 793 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 794 bool is_composite; 795 Ceed ceed; 796 CeedInt num_comp; 797 CeedVector mult_vec = NULL; 798 CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL; 799 800 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 801 802 // Check for composite operator 803 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 804 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 805 806 // Coarse Grid 807 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 808 // -- Clone input fields 809 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 810 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 811 rstr_fine = op_fine->input_fields[i]->elem_rstr; 812 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 813 } else { 814 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 815 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 816 } 817 } 818 // -- Clone output fields 819 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 820 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 821 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 822 } else { 823 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 824 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 825 } 826 } 827 // -- Clone QFunctionAssemblyData 828 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 829 830 // Multiplicity vector 831 if (op_restrict || op_prolong) { 832 CeedVector mult_e_vec; 833 CeedRestrictionType rstr_type; 834 835 CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type)); 836 CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED, 837 "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported"); 838 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 839 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 840 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 841 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 842 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 843 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 844 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 845 CeedCall(CeedVectorDestroy(&mult_e_vec)); 846 CeedCall(CeedVectorReciprocal(mult_vec)); 847 } 848 849 // Clone name 850 bool has_name = op_fine->name; 851 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 852 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 853 854 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 855 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 856 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 857 858 // Restriction/Prolongation Operators 859 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 860 861 // Restriction 862 if (op_restrict) { 863 CeedInt *num_comp_r_data; 864 CeedQFunctionContext ctx_r; 865 CeedQFunction qf_restrict; 866 867 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 868 CeedCall(CeedCalloc(1, &num_comp_r_data)); 869 num_comp_r_data[0] = num_comp; 870 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 871 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 872 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 873 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 874 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 875 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 876 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 877 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 878 879 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 880 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 881 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 882 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 883 884 // Set name 885 char *restriction_name; 886 887 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 888 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 889 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 890 CeedCall(CeedFree(&restriction_name)); 891 892 // Check 893 CeedCall(CeedOperatorCheckReady(*op_restrict)); 894 895 // Cleanup 896 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 897 } 898 899 // Prolongation 900 if (op_prolong) { 901 CeedInt *num_comp_p_data; 902 CeedQFunctionContext ctx_p; 903 CeedQFunction qf_prolong; 904 905 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 906 CeedCall(CeedCalloc(1, &num_comp_p_data)); 907 num_comp_p_data[0] = num_comp; 908 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 909 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 910 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 911 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 912 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 913 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 914 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 915 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 916 917 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 918 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 919 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 920 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 921 922 // Set name 923 char *prolongation_name; 924 925 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 926 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 927 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 928 CeedCall(CeedFree(&prolongation_name)); 929 930 // Check 931 CeedCall(CeedOperatorCheckReady(*op_prolong)); 932 933 // Cleanup 934 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 935 } 936 937 // Check 938 CeedCall(CeedOperatorCheckReady(*op_coarse)); 939 940 // Cleanup 941 CeedCall(CeedVectorDestroy(&mult_vec)); 942 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 943 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 944 return CEED_ERROR_SUCCESS; 945 } 946 947 /** 948 @brief Build 1D mass matrix and Laplacian with perturbation 949 950 @param[in] interp_1d Interpolation matrix in one dimension 951 @param[in] grad_1d Gradient matrix in one dimension 952 @param[in] q_weight_1d Quadrature weights in one dimension 953 @param[in] P_1d Number of basis nodes in one dimension 954 @param[in] Q_1d Number of quadrature points in one dimension 955 @param[in] dim Dimension of basis 956 @param[out] mass Assembled mass matrix in one dimension 957 @param[out] laplace Assembled perturbed Laplacian in one dimension 958 959 @return An error code: 0 - success, otherwise - failure 960 961 @ref Developer 962 **/ 963 CeedPragmaOptimizeOff 964 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 965 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 966 for (CeedInt i = 0; i < P_1d; i++) { 967 for (CeedInt j = 0; j < P_1d; j++) { 968 CeedScalar sum = 0.0; 969 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 970 mass[i + j * P_1d] = sum; 971 } 972 } 973 // -- Laplacian 974 for (CeedInt i = 0; i < P_1d; i++) { 975 for (CeedInt j = 0; j < P_1d; j++) { 976 CeedScalar sum = 0.0; 977 978 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 979 laplace[i + j * P_1d] = sum; 980 } 981 } 982 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 983 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 984 return CEED_ERROR_SUCCESS; 985 } 986 CeedPragmaOptimizeOn 987 988 /// @} 989 990 /// ---------------------------------------------------------------------------- 991 /// CeedOperator Backend API 992 /// ---------------------------------------------------------------------------- 993 /// @addtogroup CeedOperatorBackend 994 /// @{ 995 996 /** 997 @brief Create point block restriction for active operator field 998 999 @param[in] rstr Original CeedElemRestriction for active field 1000 @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored 1001 1002 @return An error code: 0 - success, otherwise - failure 1003 1004 @ref Backend 1005 **/ 1006 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) { 1007 Ceed ceed; 1008 CeedInt num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets; 1009 CeedSize l_size; 1010 const CeedInt *offsets; 1011 1012 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 1013 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 1014 1015 // Expand offsets 1016 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 1017 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 1018 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 1019 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 1020 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 1021 shift = num_comp; 1022 if (comp_stride != 1) shift *= num_comp; 1023 CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets)); 1024 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 1025 point_block_offsets[i] = offsets[i] * shift; 1026 } 1027 1028 // Create new restriction 1029 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 1030 point_block_offsets, point_block_rstr)); 1031 1032 // Cleanup 1033 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 1034 return CEED_ERROR_SUCCESS; 1035 } 1036 1037 /** 1038 @brief Create object holding CeedQFunction assembly data for CeedOperator 1039 1040 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1041 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 1042 1043 @return An error code: 0 - success, otherwise - failure 1044 1045 @ref Backend 1046 **/ 1047 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 1048 CeedCall(CeedCalloc(1, data)); 1049 (*data)->ref_count = 1; 1050 (*data)->ceed = ceed; 1051 CeedCall(CeedReference(ceed)); 1052 return CEED_ERROR_SUCCESS; 1053 } 1054 1055 /** 1056 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1057 1058 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 1059 1060 @return An error code: 0 - success, otherwise - failure 1061 1062 @ref Backend 1063 **/ 1064 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1065 data->ref_count++; 1066 return CEED_ERROR_SUCCESS; 1067 } 1068 1069 /** 1070 @brief Set re-use of CeedQFunctionAssemblyData 1071 1072 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 1073 @param[in] reuse_data Boolean flag indicating data re-use 1074 1075 @return An error code: 0 - success, otherwise - failure 1076 1077 @ref Backend 1078 **/ 1079 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 1080 data->reuse_data = reuse_data; 1081 data->needs_data_update = true; 1082 return CEED_ERROR_SUCCESS; 1083 } 1084 1085 /** 1086 @brief Mark QFunctionAssemblyData as stale 1087 1088 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 1089 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 1090 1091 @return An error code: 0 - success, otherwise - failure 1092 1093 @ref Backend 1094 **/ 1095 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 1096 data->needs_data_update = needs_data_update; 1097 return CEED_ERROR_SUCCESS; 1098 } 1099 1100 /** 1101 @brief Determine if QFunctionAssemblyData needs update 1102 1103 @param[in] data CeedQFunctionAssemblyData to mark as stale 1104 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1105 1106 @return An error code: 0 - success, otherwise - failure 1107 1108 @ref Backend 1109 **/ 1110 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1111 *is_update_needed = !data->reuse_data || data->needs_data_update; 1112 return CEED_ERROR_SUCCESS; 1113 } 1114 1115 /** 1116 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1117 1118 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1119 1120 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1121 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1122 CeedQFunctionAssemblyData. 1123 1124 @param[in] data CeedQFunctionAssemblyData to copy reference to 1125 @param[in,out] data_copy Variable to store copied reference 1126 1127 @return An error code: 0 - success, otherwise - failure 1128 1129 @ref Backend 1130 **/ 1131 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1132 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1133 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1134 *data_copy = data; 1135 return CEED_ERROR_SUCCESS; 1136 } 1137 1138 /** 1139 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1140 1141 @param[in] data CeedQFunctionAssemblyData to retrieve status 1142 @param[out] is_setup Boolean flag for setup status 1143 1144 @return An error code: 0 - success, otherwise - failure 1145 1146 @ref Backend 1147 **/ 1148 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1149 *is_setup = data->is_setup; 1150 return CEED_ERROR_SUCCESS; 1151 } 1152 1153 /** 1154 @brief Set internal objects for CeedQFunctionAssemblyData 1155 1156 @param[in,out] data CeedQFunctionAssemblyData to set objects 1157 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1158 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1159 1160 @return An error code: 0 - success, otherwise - failure 1161 1162 @ref Backend 1163 **/ 1164 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1165 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1166 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1167 1168 data->is_setup = true; 1169 return CEED_ERROR_SUCCESS; 1170 } 1171 1172 /** 1173 @brief Get internal objects for CeedQFunctionAssemblyData 1174 1175 @param[in,out] data CeedQFunctionAssemblyData to set objects 1176 @param[out] vec CeedVector to store assembled CeedQFunction at quadrature points 1177 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1178 1179 @return An error code: 0 - success, otherwise - failure 1180 1181 @ref Backend 1182 **/ 1183 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1184 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1185 1186 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1187 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1188 return CEED_ERROR_SUCCESS; 1189 } 1190 1191 /** 1192 @brief Destroy CeedQFunctionAssemblyData 1193 1194 @param[in,out] data CeedQFunctionAssemblyData to destroy 1195 1196 @return An error code: 0 - success, otherwise - failure 1197 1198 @ref Backend 1199 **/ 1200 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1201 if (!*data || --(*data)->ref_count > 0) { 1202 *data = NULL; 1203 return CEED_ERROR_SUCCESS; 1204 } 1205 CeedCall(CeedDestroy(&(*data)->ceed)); 1206 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1207 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1208 1209 CeedCall(CeedFree(data)); 1210 return CEED_ERROR_SUCCESS; 1211 } 1212 1213 /** 1214 @brief Get CeedOperatorAssemblyData 1215 1216 @param[in] op CeedOperator to assemble 1217 @param[out] data CeedQFunctionAssemblyData 1218 1219 @return An error code: 0 - success, otherwise - failure 1220 1221 @ref Backend 1222 **/ 1223 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1224 if (!op->op_assembled) { 1225 CeedOperatorAssemblyData data; 1226 1227 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1228 op->op_assembled = data; 1229 } 1230 *data = op->op_assembled; 1231 return CEED_ERROR_SUCCESS; 1232 } 1233 1234 /** 1235 @brief Create object holding CeedOperator assembly data. 1236 1237 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1238 An array with references to the corresponding active CeedElemRestrictions is also stored. 1239 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1240 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1241 CeedEvalMode. 1242 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1243 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1244 1245 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1246 @param[in] op CeedOperator to be assembled 1247 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1248 1249 @return An error code: 0 - success, otherwise - failure 1250 1251 @ref Backend 1252 **/ 1253 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1254 CeedInt num_active_bases_in = 0, num_active_bases_out = 0, offset = 0; 1255 CeedInt num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL; 1256 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1257 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1258 CeedQFunctionField *qf_fields; 1259 CeedQFunction qf; 1260 CeedOperatorField *op_fields; 1261 bool is_composite; 1262 1263 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1264 CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators."); 1265 1266 // Allocate 1267 CeedCall(CeedCalloc(1, data)); 1268 (*data)->ceed = ceed; 1269 CeedCall(CeedReference(ceed)); 1270 1271 // Build OperatorAssembly data 1272 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1273 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1274 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1275 1276 // Determine active input basis 1277 for (CeedInt i = 0; i < num_input_fields; i++) { 1278 CeedVector vec; 1279 1280 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1281 if (vec == CEED_VECTOR_ACTIVE) { 1282 CeedInt index = -1, num_comp, q_comp; 1283 CeedEvalMode eval_mode; 1284 CeedBasis basis_in = NULL; 1285 1286 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1287 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1288 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1289 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1290 for (CeedInt i = 0; i < num_active_bases_in; i++) { 1291 if ((*data)->active_bases_in[i] == basis_in) index = i; 1292 } 1293 if (index == -1) { 1294 CeedElemRestriction elem_rstr_in; 1295 1296 index = num_active_bases_in; 1297 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in)); 1298 (*data)->active_bases_in[num_active_bases_in] = NULL; 1299 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in])); 1300 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in)); 1301 (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL; 1302 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1303 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in])); 1304 CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in)); 1305 num_eval_modes_in[index] = 0; 1306 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in)); 1307 eval_modes_in[index] = NULL; 1308 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in)); 1309 eval_mode_offsets_in[index] = NULL; 1310 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in)); 1311 (*data)->assembled_bases_in[index] = NULL; 1312 num_active_bases_in++; 1313 } 1314 if (eval_mode != CEED_EVAL_WEIGHT) { 1315 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1316 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1317 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1318 for (CeedInt d = 0; d < q_comp; d++) { 1319 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1320 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1321 offset += num_comp; 1322 } 1323 num_eval_modes_in[index] += q_comp; 1324 } 1325 } 1326 } 1327 1328 // Determine active output basis 1329 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1330 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1331 offset = 0; 1332 for (CeedInt i = 0; i < num_output_fields; i++) { 1333 CeedVector vec; 1334 1335 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1336 if (vec == CEED_VECTOR_ACTIVE) { 1337 CeedInt index = -1, num_comp, q_comp; 1338 CeedEvalMode eval_mode; 1339 CeedBasis basis_out = NULL; 1340 1341 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1342 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1343 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1344 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1345 for (CeedInt i = 0; i < num_active_bases_out; i++) { 1346 if ((*data)->active_bases_out[i] == basis_out) index = i; 1347 } 1348 if (index == -1) { 1349 CeedElemRestriction elem_rstr_out; 1350 1351 index = num_active_bases_out; 1352 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out)); 1353 (*data)->active_bases_out[num_active_bases_out] = NULL; 1354 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out])); 1355 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out)); 1356 (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL; 1357 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1358 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out])); 1359 CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out)); 1360 num_eval_modes_out[index] = 0; 1361 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out)); 1362 eval_modes_out[index] = NULL; 1363 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out)); 1364 eval_mode_offsets_out[index] = NULL; 1365 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out)); 1366 (*data)->assembled_bases_out[index] = NULL; 1367 num_active_bases_out++; 1368 } 1369 if (eval_mode != CEED_EVAL_WEIGHT) { 1370 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1371 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1372 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1373 for (CeedInt d = 0; d < q_comp; d++) { 1374 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1375 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1376 offset += num_comp; 1377 } 1378 num_eval_modes_out[index] += q_comp; 1379 } 1380 } 1381 } 1382 (*data)->num_active_bases_in = num_active_bases_in; 1383 (*data)->num_eval_modes_in = num_eval_modes_in; 1384 (*data)->eval_modes_in = eval_modes_in; 1385 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1386 (*data)->num_active_bases_out = num_active_bases_out; 1387 (*data)->num_eval_modes_out = num_eval_modes_out; 1388 (*data)->eval_modes_out = eval_modes_out; 1389 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1390 (*data)->num_output_components = offset; 1391 return CEED_ERROR_SUCCESS; 1392 } 1393 1394 /** 1395 @brief Get CeedOperator CeedEvalModes for assembly. 1396 1397 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1398 1399 @param[in] data CeedOperatorAssemblyData 1400 @param[out] num_active_bases_in Total number of active bases for input 1401 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1402 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1403 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1404 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1405 @param[out] num_active_bases_out Total number of active bases for output 1406 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1407 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1408 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1409 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1410 including contributions of all active bases 1411 1412 @return An error code: 0 - success, otherwise - failure 1413 1414 @ref Backend 1415 **/ 1416 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in, 1417 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out, 1418 CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, 1419 CeedSize *num_output_components) { 1420 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1421 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1422 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1423 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1424 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1425 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1426 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1427 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1428 if (num_output_components) *num_output_components = data->num_output_components; 1429 return CEED_ERROR_SUCCESS; 1430 } 1431 1432 /** 1433 @brief Get CeedOperator CeedBasis data for assembly. 1434 1435 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1436 1437 @param[in] data CeedOperatorAssemblyData 1438 @param[out] num_active_bases_in Number of active input bases, or NULL 1439 @param[out] active_bases_in Pointer to hold active input CeedBasis, or NULL 1440 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1441 @param[out] num_active_bases_out Number of active output bases, or NULL 1442 @param[out] active_bases_out Pointer to hold active output CeedBasis, or NULL 1443 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1444 1445 @return An error code: 0 - success, otherwise - failure 1446 1447 @ref Backend 1448 **/ 1449 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in, 1450 const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out, 1451 const CeedScalar ***assembled_bases_out) { 1452 // Assemble B_in, B_out if needed 1453 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1454 CeedInt num_qpts; 1455 1456 if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts)); 1457 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts)); 1458 for (CeedInt b = 0; b < data->num_active_bases_in; b++) { 1459 bool has_eval_none = false; 1460 CeedInt num_nodes; 1461 CeedScalar *B_in = NULL, *identity = NULL; 1462 1463 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes)); 1464 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1465 1466 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1467 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1468 } 1469 if (has_eval_none) { 1470 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1471 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1472 identity[i * num_nodes + i] = 1.0; 1473 } 1474 } 1475 1476 for (CeedInt q = 0; q < num_qpts; q++) { 1477 for (CeedInt n = 0; n < num_nodes; n++) { 1478 CeedInt d_in = 0, q_comp_in; 1479 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1480 1481 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1482 const CeedInt qq = data->num_eval_modes_in[b] * q; 1483 const CeedScalar *B = NULL; 1484 1485 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B)); 1486 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1487 if (q_comp_in > 1) { 1488 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1489 else B = &B[(++d_in) * num_qpts * num_nodes]; 1490 } 1491 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1492 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1493 } 1494 } 1495 } 1496 if (identity) CeedCall(CeedFree(&identity)); 1497 data->assembled_bases_in[b] = B_in; 1498 } 1499 } 1500 1501 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1502 CeedInt num_qpts; 1503 1504 if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts)); 1505 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts)); 1506 for (CeedInt b = 0; b < data->num_active_bases_out; b++) { 1507 bool has_eval_none = false; 1508 CeedInt num_nodes; 1509 CeedScalar *B_out = NULL, *identity = NULL; 1510 1511 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes)); 1512 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1513 1514 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1515 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1516 } 1517 if (has_eval_none) { 1518 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1519 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1520 identity[i * num_nodes + i] = 1.0; 1521 } 1522 } 1523 1524 for (CeedInt q = 0; q < num_qpts; q++) { 1525 for (CeedInt n = 0; n < num_nodes; n++) { 1526 CeedInt d_out = 0, q_comp_out; 1527 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1528 1529 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1530 const CeedInt qq = data->num_eval_modes_out[b] * q; 1531 const CeedScalar *B = NULL; 1532 1533 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B)); 1534 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1535 if (q_comp_out > 1) { 1536 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1537 else B = &B[(++d_out) * num_qpts * num_nodes]; 1538 } 1539 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1540 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1541 } 1542 } 1543 } 1544 if (identity) CeedCall(CeedFree(&identity)); 1545 data->assembled_bases_out[b] = B_out; 1546 } 1547 } 1548 1549 // Pass out assembled data 1550 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1551 if (active_bases_in) *active_bases_in = data->active_bases_in; 1552 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1553 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1554 if (active_bases_out) *active_bases_out = data->active_bases_out; 1555 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1556 return CEED_ERROR_SUCCESS; 1557 } 1558 1559 /** 1560 @brief Get CeedOperator CeedBasis data for assembly. 1561 1562 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1563 1564 @param[in] data CeedOperatorAssemblyData 1565 @param[out] num_active_elem_rstrs_in Number of active input element restrictions, or NULL 1566 @param[out] active_elem_rstrs_in Pointer to hold active input CeedElemRestrictions, or NULL 1567 @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL 1568 @param[out] active_elem_rstrs_out Pointer to hold active output CeedElemRestrictions, or NULL 1569 1570 @return An error code: 0 - success, otherwise - failure 1571 1572 @ref Backend 1573 **/ 1574 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in, 1575 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out, 1576 CeedElemRestriction **active_elem_rstrs_out) { 1577 if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in; 1578 if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in; 1579 if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out; 1580 if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out; 1581 return CEED_ERROR_SUCCESS; 1582 } 1583 1584 /** 1585 @brief Destroy CeedOperatorAssemblyData 1586 1587 @param[in,out] data CeedOperatorAssemblyData to destroy 1588 1589 @return An error code: 0 - success, otherwise - failure 1590 1591 @ref Backend 1592 **/ 1593 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1594 if (!*data) { 1595 *data = NULL; 1596 return CEED_ERROR_SUCCESS; 1597 } 1598 CeedCall(CeedDestroy(&(*data)->ceed)); 1599 for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) { 1600 CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b])); 1601 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b])); 1602 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1603 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1604 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1605 } 1606 for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) { 1607 CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b])); 1608 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b])); 1609 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1610 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1611 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1612 } 1613 CeedCall(CeedFree(&(*data)->active_bases_in)); 1614 CeedCall(CeedFree(&(*data)->active_bases_out)); 1615 CeedCall(CeedFree(&(*data)->active_elem_rstrs_in)); 1616 CeedCall(CeedFree(&(*data)->active_elem_rstrs_out)); 1617 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1618 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1619 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1620 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1621 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1622 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1623 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1624 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1625 1626 CeedCall(CeedFree(data)); 1627 return CEED_ERROR_SUCCESS; 1628 } 1629 1630 /** 1631 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 1632 1633 @param[in] op CeedOperator to retrieve fallback for 1634 @param[out] op_fallback Fallback CeedOperator 1635 1636 @return An error code: 0 - success, otherwise - failure 1637 1638 @ref Backend 1639 **/ 1640 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 1641 // Create if needed 1642 if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op)); 1643 if (op->op_fallback) { 1644 bool is_debug; 1645 1646 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 1647 if (is_debug) { 1648 Ceed ceed, ceed_fallback; 1649 const char *resource, *resource_fallback; 1650 1651 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1652 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 1653 CeedCall(CeedGetResource(ceed, &resource)); 1654 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 1655 1656 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n"); 1657 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 1658 op->op_fallback); 1659 } 1660 } 1661 *op_fallback = op->op_fallback; 1662 return CEED_ERROR_SUCCESS; 1663 } 1664 1665 /** 1666 @brief Get the parent CeedOperator for a fallback CeedOperator 1667 1668 @param[in] op CeedOperator context 1669 @param[out] parent Variable to store parent CeedOperator context 1670 1671 @return An error code: 0 - success, otherwise - failure 1672 1673 @ref Backend 1674 **/ 1675 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) { 1676 *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL; 1677 return CEED_ERROR_SUCCESS; 1678 } 1679 1680 /** 1681 @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator 1682 1683 @param[in] op CeedOperator context 1684 @param[out] parent Variable to store parent Ceed context 1685 1686 @return An error code: 0 - success, otherwise - failure 1687 1688 @ref Backend 1689 **/ 1690 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) { 1691 *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed; 1692 return CEED_ERROR_SUCCESS; 1693 } 1694 1695 /// @} 1696 1697 /// ---------------------------------------------------------------------------- 1698 /// CeedOperator Public API 1699 /// ---------------------------------------------------------------------------- 1700 /// @addtogroup CeedOperatorUser 1701 /// @{ 1702 1703 /** 1704 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1705 1706 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1707 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1708 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1709 1710 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1711 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1712 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1713 1714 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1715 1716 @param[in] op CeedOperator to assemble CeedQFunction 1717 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1718 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1719 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1720 1721 @return An error code: 0 - success, otherwise - failure 1722 1723 @ref User 1724 **/ 1725 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1726 CeedCall(CeedOperatorCheckReady(op)); 1727 1728 if (op->LinearAssembleQFunction) { 1729 // Backend version 1730 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1731 } else { 1732 // Operator fallback 1733 CeedOperator op_fallback; 1734 1735 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1736 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1737 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1738 } 1739 return CEED_ERROR_SUCCESS; 1740 } 1741 1742 /** 1743 @brief Assemble CeedQFunction and store result internally. 1744 1745 Return copied references of stored data to the caller. 1746 Caller is responsible for ownership and destruction of the copied references. 1747 See also @ref CeedOperatorLinearAssembleQFunction 1748 1749 Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers. 1750 These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object. 1751 1752 @param[in] op CeedOperator to assemble CeedQFunction 1753 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1754 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1755 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1756 1757 @return An error code: 0 - success, otherwise - failure 1758 1759 @ref User 1760 **/ 1761 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1762 int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL; 1763 CeedOperator op_assemble = NULL; 1764 CeedOperator op_fallback_parent = NULL; 1765 1766 CeedCall(CeedOperatorCheckReady(op)); 1767 1768 // Determine if fallback parent or operator has implementation 1769 CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent)); 1770 if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) { 1771 // -- Backend version for op fallback parent is faster, if it exists 1772 LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate; 1773 op_assemble = op_fallback_parent; 1774 } else if (op->LinearAssembleQFunctionUpdate) { 1775 // -- Backend version for op 1776 LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate; 1777 op_assemble = op; 1778 } 1779 1780 // Assemble QFunction 1781 if (LinearAssembleQFunctionUpdate) { 1782 // Backend or fallback parent version 1783 bool qf_assembled_is_setup; 1784 CeedVector assembled_vec = NULL; 1785 CeedElemRestriction assembled_rstr = NULL; 1786 1787 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1788 if (qf_assembled_is_setup) { 1789 bool update_needed; 1790 1791 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1792 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1793 if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request)); 1794 } else { 1795 CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request)); 1796 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1797 } 1798 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1799 1800 // Copy reference from internally held copy 1801 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1802 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1803 CeedCall(CeedVectorDestroy(&assembled_vec)); 1804 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1805 } else { 1806 // Operator fallback 1807 CeedOperator op_fallback; 1808 1809 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1810 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1811 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1812 } 1813 return CEED_ERROR_SUCCESS; 1814 } 1815 1816 /** 1817 @brief Assemble the diagonal of a square linear CeedOperator 1818 1819 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1820 1821 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1822 1823 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1824 1825 @param[in] op CeedOperator to assemble CeedQFunction 1826 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1827 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1828 1829 @return An error code: 0 - success, otherwise - failure 1830 1831 @ref User 1832 **/ 1833 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1834 bool is_composite; 1835 CeedSize input_size = 0, output_size = 0; 1836 1837 CeedCall(CeedOperatorCheckReady(op)); 1838 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1839 1840 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1841 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1842 1843 // Early exit for empty operator 1844 if (!is_composite) { 1845 CeedInt num_elem = 0; 1846 1847 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1848 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1849 } 1850 1851 if (op->LinearAssembleDiagonal) { 1852 // Backend version 1853 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1854 return CEED_ERROR_SUCCESS; 1855 } else if (op->LinearAssembleAddDiagonal) { 1856 // Backend version with zeroing first 1857 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1858 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1859 return CEED_ERROR_SUCCESS; 1860 } else { 1861 // Operator fallback 1862 CeedOperator op_fallback; 1863 1864 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1865 if (op_fallback) { 1866 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1867 return CEED_ERROR_SUCCESS; 1868 } 1869 } 1870 // Default interface implementation 1871 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1872 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1873 return CEED_ERROR_SUCCESS; 1874 } 1875 1876 /** 1877 @brief Assemble the diagonal of a square linear CeedOperator 1878 1879 This sums into a CeedVector the diagonal of a linear CeedOperator. 1880 1881 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1882 1883 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1884 1885 @param[in] op CeedOperator to assemble CeedQFunction 1886 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1887 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1888 1889 @return An error code: 0 - success, otherwise - failure 1890 1891 @ref User 1892 **/ 1893 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1894 bool is_composite; 1895 CeedSize input_size = 0, output_size = 0; 1896 1897 CeedCall(CeedOperatorCheckReady(op)); 1898 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1899 1900 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1901 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1902 1903 // Early exit for empty operator 1904 if (!is_composite) { 1905 CeedInt num_elem = 0; 1906 1907 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1908 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1909 } 1910 1911 if (op->LinearAssembleAddDiagonal) { 1912 // Backend version 1913 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1914 return CEED_ERROR_SUCCESS; 1915 } else { 1916 // Operator fallback 1917 CeedOperator op_fallback; 1918 1919 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1920 if (op_fallback) { 1921 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1922 return CEED_ERROR_SUCCESS; 1923 } 1924 } 1925 // Default interface implementation 1926 if (is_composite) { 1927 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1928 } else { 1929 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1930 } 1931 return CEED_ERROR_SUCCESS; 1932 } 1933 1934 /** 1935 @brief Fully assemble the point-block diagonal pattern of a linear operator. 1936 1937 Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal(). 1938 1939 The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the 1940 matrix in entry (i, j). 1941 Note that the (i, j) pairs are unique. 1942 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in 1943 the same ordering. 1944 1945 This will generally be slow unless your operator is low-order. 1946 1947 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1948 1949 @param[in] op CeedOperator to assemble 1950 @param[out] num_entries Number of entries in coordinate nonzero pattern 1951 @param[out] rows Row number for each entry 1952 @param[out] cols Column number for each entry 1953 1954 @ref User 1955 **/ 1956 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1957 Ceed ceed; 1958 bool is_composite; 1959 CeedInt num_active_components, num_sub_operators; 1960 CeedOperator *sub_operators; 1961 1962 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1963 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1964 1965 CeedSize input_size = 0, output_size = 0; 1966 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1967 CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1968 1969 if (is_composite) { 1970 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators)); 1971 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1972 } else { 1973 sub_operators = &op; 1974 num_sub_operators = 1; 1975 } 1976 1977 // Verify operator can be assembled correctly 1978 { 1979 CeedOperatorAssemblyData data; 1980 CeedInt num_active_elem_rstrs, comp_stride; 1981 CeedElemRestriction *active_elem_rstrs; 1982 1983 // Get initial values to check against 1984 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data)); 1985 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 1986 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride)); 1987 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components)); 1988 1989 // Verify that all active element restrictions have same component stride and number of components 1990 for (CeedInt k = 0; k < num_sub_operators; k++) { 1991 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data)); 1992 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 1993 for (CeedInt i = 0; i < num_active_elem_rstrs; i++) { 1994 CeedInt comp_stride_sub, num_active_components_sub; 1995 1996 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub)); 1997 CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION, 1998 "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub); 1999 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub)); 2000 CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE, 2001 "All suboperators must have the same number of output components"); 2002 } 2003 } 2004 } 2005 *num_entries = input_size * num_active_components; 2006 CeedCall(CeedCalloc(*num_entries, rows)); 2007 CeedCall(CeedCalloc(*num_entries, cols)); 2008 2009 for (CeedInt o = 0; o < num_sub_operators; o++) { 2010 CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr; 2011 CeedInt comp_stride, num_elem, elem_size; 2012 const CeedInt *offsets, *point_block_offsets; 2013 2014 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr)); 2015 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride)); 2016 CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem)); 2017 CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size)); 2018 CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets)); 2019 2020 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr)); 2021 CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets)); 2022 2023 for (CeedSize i = 0; i < num_elem * elem_size; i++) { 2024 for (CeedInt c_out = 0; c_out < num_active_components; c_out++) { 2025 for (CeedInt c_in = 0; c_in < num_active_components; c_in++) { 2026 (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride; 2027 (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride; 2028 } 2029 } 2030 } 2031 2032 CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets)); 2033 CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets)); 2034 CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr)); 2035 } 2036 return CEED_ERROR_SUCCESS; 2037 } 2038 2039 /** 2040 @brief Assemble the point block diagonal of a square linear CeedOperator 2041 2042 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 2043 2044 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 2045 2046 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2047 2048 @param[in] op CeedOperator to assemble CeedQFunction 2049 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 2050 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 2051 component in]. 2052 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2053 2054 @return An error code: 0 - success, otherwise - failure 2055 2056 @ref User 2057 **/ 2058 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2059 bool is_composite; 2060 CeedSize input_size = 0, output_size = 0; 2061 2062 CeedCall(CeedOperatorCheckReady(op)); 2063 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2064 2065 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2066 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2067 2068 // Early exit for empty operator 2069 if (!is_composite) { 2070 CeedInt num_elem = 0; 2071 2072 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2073 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2074 } 2075 2076 if (op->LinearAssemblePointBlockDiagonal) { 2077 // Backend version 2078 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 2079 return CEED_ERROR_SUCCESS; 2080 } else if (op->LinearAssembleAddPointBlockDiagonal) { 2081 // Backend version with zeroing first 2082 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2083 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2084 return CEED_ERROR_SUCCESS; 2085 } else { 2086 // Operator fallback 2087 CeedOperator op_fallback; 2088 2089 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2090 if (op_fallback) { 2091 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 2092 return CEED_ERROR_SUCCESS; 2093 } 2094 } 2095 // Default interface implementation 2096 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2097 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2098 return CEED_ERROR_SUCCESS; 2099 } 2100 2101 /** 2102 @brief Assemble the point block diagonal of a square linear CeedOperator 2103 2104 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 2105 2106 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 2107 2108 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2109 2110 @param[in] op CeedOperator to assemble CeedQFunction 2111 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 2112 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 2113 component in]. 2114 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2115 2116 @return An error code: 0 - success, otherwise - failure 2117 2118 @ref User 2119 **/ 2120 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2121 bool is_composite; 2122 CeedSize input_size = 0, output_size = 0; 2123 2124 CeedCall(CeedOperatorCheckReady(op)); 2125 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2126 2127 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2128 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2129 2130 // Early exit for empty operator 2131 if (!is_composite) { 2132 CeedInt num_elem = 0; 2133 2134 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2135 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2136 } 2137 2138 if (op->LinearAssembleAddPointBlockDiagonal) { 2139 // Backend version 2140 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2141 return CEED_ERROR_SUCCESS; 2142 } else { 2143 // Operator fallback 2144 CeedOperator op_fallback; 2145 2146 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2147 if (op_fallback) { 2148 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 2149 return CEED_ERROR_SUCCESS; 2150 } 2151 } 2152 // Default interface implementation 2153 if (is_composite) { 2154 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 2155 } else { 2156 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 2157 } 2158 return CEED_ERROR_SUCCESS; 2159 } 2160 2161 /** 2162 @brief Fully assemble the nonzero pattern of a linear operator. 2163 2164 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 2165 2166 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 2167 matrix in entry (i, j). 2168 Note that the (i, j) pairs are not unique and may repeat. 2169 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 2170 2171 This will generally be slow unless your operator is low-order. 2172 2173 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2174 2175 @param[in] op CeedOperator to assemble 2176 @param[out] num_entries Number of entries in coordinate nonzero pattern 2177 @param[out] rows Row number for each entry 2178 @param[out] cols Column number for each entry 2179 2180 @ref User 2181 **/ 2182 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 2183 bool is_composite; 2184 CeedInt num_suboperators, offset = 0; 2185 CeedSize single_entries; 2186 CeedOperator *sub_operators; 2187 2188 CeedCall(CeedOperatorCheckReady(op)); 2189 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2190 2191 if (op->LinearAssembleSymbolic) { 2192 // Backend version 2193 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 2194 return CEED_ERROR_SUCCESS; 2195 } else { 2196 // Operator fallback 2197 CeedOperator op_fallback; 2198 2199 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2200 if (op_fallback) { 2201 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 2202 return CEED_ERROR_SUCCESS; 2203 } 2204 } 2205 2206 // Default interface implementation 2207 2208 // Count entries and allocate rows, cols arrays 2209 *num_entries = 0; 2210 if (is_composite) { 2211 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2212 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2213 for (CeedInt k = 0; k < num_suboperators; ++k) { 2214 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2215 *num_entries += single_entries; 2216 } 2217 } else { 2218 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 2219 *num_entries += single_entries; 2220 } 2221 CeedCall(CeedCalloc(*num_entries, rows)); 2222 CeedCall(CeedCalloc(*num_entries, cols)); 2223 2224 // Assemble nonzero locations 2225 if (is_composite) { 2226 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2227 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2228 for (CeedInt k = 0; k < num_suboperators; ++k) { 2229 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 2230 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2231 offset += single_entries; 2232 } 2233 } else { 2234 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 2235 } 2236 return CEED_ERROR_SUCCESS; 2237 } 2238 2239 /** 2240 @brief Fully assemble the nonzero entries of a linear operator. 2241 2242 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 2243 2244 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 2245 matrix in entry (i, j). 2246 Note that the (i, j) pairs are not unique and may repeat. 2247 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 2248 2249 This will generally be slow unless your operator is low-order. 2250 2251 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2252 2253 @param[in] op CeedOperator to assemble 2254 @param[out] values Values to assemble into matrix 2255 2256 @ref User 2257 **/ 2258 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 2259 bool is_composite; 2260 CeedInt num_suboperators, offset = 0; 2261 CeedSize single_entries = 0; 2262 CeedOperator *sub_operators; 2263 2264 CeedCall(CeedOperatorCheckReady(op)); 2265 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2266 2267 // Early exit for empty operator 2268 if (!is_composite) { 2269 CeedInt num_elem = 0; 2270 2271 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2272 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2273 } 2274 2275 if (op->LinearAssemble) { 2276 // Backend version 2277 CeedCall(op->LinearAssemble(op, values)); 2278 return CEED_ERROR_SUCCESS; 2279 } else { 2280 // Operator fallback 2281 CeedOperator op_fallback; 2282 2283 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2284 if (op_fallback) { 2285 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2286 return CEED_ERROR_SUCCESS; 2287 } 2288 } 2289 2290 // Default interface implementation 2291 CeedCall(CeedVectorSetValue(values, 0.0)); 2292 if (is_composite) { 2293 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2294 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2295 for (CeedInt k = 0; k < num_suboperators; k++) { 2296 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2297 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2298 offset += single_entries; 2299 } 2300 } else { 2301 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2302 } 2303 return CEED_ERROR_SUCCESS; 2304 } 2305 2306 /** 2307 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2308 2309 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2310 2311 @param[in] op Composite CeedOperator 2312 @param[in] num_skip_indices Number of suboperators to skip 2313 @param[in] skip_indices Array of indices of suboperators to skip 2314 @param[out] mult Vector to store multiplicity (of size l_size) 2315 2316 @return An error code: 0 - success, otherwise - failure 2317 2318 @ref User 2319 **/ 2320 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2321 Ceed ceed; 2322 CeedInt num_suboperators; 2323 CeedSize l_vec_len; 2324 CeedScalar *mult_array; 2325 CeedVector ones_l_vec; 2326 CeedElemRestriction elem_rstr, mult_elem_rstr; 2327 CeedOperator *sub_operators; 2328 2329 CeedCall(CeedOperatorCheckReady(op)); 2330 2331 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2332 2333 // Zero mult vector 2334 CeedCall(CeedVectorSetValue(mult, 0.0)); 2335 2336 // Get suboperators 2337 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2338 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2339 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2340 2341 // Work vector 2342 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2343 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2344 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2345 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2346 2347 // Compute multiplicity across suboperators 2348 for (CeedInt i = 0; i < num_suboperators; i++) { 2349 const CeedScalar *sub_mult_array; 2350 CeedVector sub_mult_l_vec, ones_e_vec; 2351 2352 // -- Check for suboperator to skip 2353 for (CeedInt j = 0; j < num_skip_indices; j++) { 2354 if (skip_indices[j] == i) continue; 2355 } 2356 2357 // -- Sub operator multiplicity 2358 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2359 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr)); 2360 CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2361 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2362 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2363 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2364 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2365 // ---- Flag every node present in the current suboperator 2366 for (CeedInt j = 0; j < l_vec_len; j++) { 2367 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2368 } 2369 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2370 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2371 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2372 CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr)); 2373 } 2374 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2375 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2376 return CEED_ERROR_SUCCESS; 2377 } 2378 2379 /** 2380 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2381 grid interpolation 2382 2383 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2384 2385 @param[in] op_fine Fine grid operator 2386 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2387 @param[in] rstr_coarse Coarse grid restriction 2388 @param[in] basis_coarse Coarse grid active vector basis 2389 @param[out] op_coarse Coarse grid operator 2390 @param[out] op_prolong Coarse to fine operator, or NULL 2391 @param[out] op_restrict Fine to coarse operator, or NULL 2392 2393 @return An error code: 0 - success, otherwise - failure 2394 2395 @ref User 2396 **/ 2397 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2398 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2399 CeedBasis basis_c_to_f = NULL; 2400 2401 CeedCall(CeedOperatorCheckReady(op_fine)); 2402 2403 // Build prolongation matrix, if required 2404 if (op_prolong || op_restrict) { 2405 CeedBasis basis_fine; 2406 2407 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2408 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2409 } 2410 2411 // Core code 2412 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2413 return CEED_ERROR_SUCCESS; 2414 } 2415 2416 /** 2417 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2418 2419 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2420 2421 @param[in] op_fine Fine grid operator 2422 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2423 @param[in] rstr_coarse Coarse grid restriction 2424 @param[in] basis_coarse Coarse grid active vector basis 2425 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2426 @param[out] op_coarse Coarse grid operator 2427 @param[out] op_prolong Coarse to fine operator, or NULL 2428 @param[out] op_restrict Fine to coarse operator, or NULL 2429 2430 @return An error code: 0 - success, otherwise - failure 2431 2432 @ref User 2433 **/ 2434 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2435 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2436 CeedOperator *op_restrict) { 2437 Ceed ceed; 2438 CeedInt Q_f, Q_c; 2439 CeedBasis basis_fine, basis_c_to_f = NULL; 2440 2441 CeedCall(CeedOperatorCheckReady(op_fine)); 2442 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2443 2444 // Check for compatible quadrature spaces 2445 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2446 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2447 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2448 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2449 2450 // Create coarse to fine basis, if required 2451 if (op_prolong || op_restrict) { 2452 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2453 CeedScalar *q_ref, *q_weight, *grad; 2454 2455 // Check if interpolation matrix is provided 2456 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2457 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2458 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2459 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2460 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2461 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2462 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2463 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2464 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2465 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2466 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2467 CeedCall(CeedFree(&q_ref)); 2468 CeedCall(CeedFree(&q_weight)); 2469 CeedCall(CeedFree(&grad)); 2470 } 2471 2472 // Core code 2473 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2474 return CEED_ERROR_SUCCESS; 2475 } 2476 2477 /** 2478 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2479 2480 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2481 2482 @param[in] op_fine Fine grid operator 2483 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2484 @param[in] rstr_coarse Coarse grid restriction 2485 @param[in] basis_coarse Coarse grid active vector basis 2486 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2487 @param[out] op_coarse Coarse grid operator 2488 @param[out] op_prolong Coarse to fine operator, or NULL 2489 @param[out] op_restrict Fine to coarse operator, or NULL 2490 2491 @return An error code: 0 - success, otherwise - failure 2492 2493 @ref User 2494 **/ 2495 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2496 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2497 CeedOperator *op_restrict) { 2498 Ceed ceed; 2499 CeedInt Q_f, Q_c; 2500 CeedBasis basis_fine, basis_c_to_f = NULL; 2501 2502 CeedCall(CeedOperatorCheckReady(op_fine)); 2503 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2504 2505 // Check for compatible quadrature spaces 2506 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2507 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2508 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2509 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2510 2511 // Coarse to fine basis 2512 if (op_prolong || op_restrict) { 2513 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2514 CeedScalar *q_ref, *q_weight, *grad; 2515 CeedElemTopology topo; 2516 2517 // Check if interpolation matrix is provided 2518 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2519 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2520 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2521 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2522 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2523 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2524 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2525 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2526 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2527 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2528 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2529 CeedCall(CeedFree(&q_ref)); 2530 CeedCall(CeedFree(&q_weight)); 2531 CeedCall(CeedFree(&grad)); 2532 } 2533 2534 // Core code 2535 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2536 return CEED_ERROR_SUCCESS; 2537 } 2538 2539 /** 2540 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2541 2542 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2543 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2544 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2545 \hat S V\f$. 2546 The CeedOperator must be linear and non-composite. 2547 The associated CeedQFunction must therefore also be linear. 2548 2549 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2550 2551 @param[in] op CeedOperator to create element inverses 2552 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2553 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2554 2555 @return An error code: 0 - success, otherwise - failure 2556 2557 @ref User 2558 **/ 2559 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2560 Ceed ceed, ceed_parent; 2561 bool interp = false, grad = false, is_tensor_basis = true; 2562 CeedInt num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2563 CeedSize l_size = 1; 2564 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg; 2565 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2566 CeedVector q_data; 2567 CeedElemRestriction rstr = NULL, rstr_qd_i; 2568 CeedBasis basis = NULL, fdm_basis; 2569 CeedQFunctionContext ctx_fdm; 2570 CeedQFunctionField *qf_fields; 2571 CeedQFunction qf, qf_fdm; 2572 CeedOperatorField *op_fields; 2573 2574 CeedCall(CeedOperatorCheckReady(op)); 2575 2576 if (op->CreateFDMElementInverse) { 2577 // Backend version 2578 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2579 return CEED_ERROR_SUCCESS; 2580 } else { 2581 // Operator fallback 2582 CeedOperator op_fallback; 2583 2584 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2585 if (op_fallback) { 2586 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2587 return CEED_ERROR_SUCCESS; 2588 } 2589 } 2590 2591 // Default interface implementation 2592 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2593 CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent)); 2594 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2595 2596 // Determine active input basis 2597 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2598 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2599 for (CeedInt i = 0; i < num_input_fields; i++) { 2600 CeedVector vec; 2601 2602 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2603 if (vec == CEED_VECTOR_ACTIVE) { 2604 CeedEvalMode eval_mode; 2605 2606 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2607 interp = interp || eval_mode == CEED_EVAL_INTERP; 2608 grad = grad || eval_mode == CEED_EVAL_GRAD; 2609 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2610 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2611 } 2612 } 2613 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2614 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2615 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2616 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2617 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2618 CeedCall(CeedBasisGetDimension(basis, &dim)); 2619 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2620 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2621 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2622 2623 // Build and diagonalize 1D Mass and Laplacian 2624 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2625 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2626 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2627 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2628 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2629 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2630 CeedCall(CeedCalloc(P_1d, &lambda)); 2631 // -- Build matrices 2632 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2633 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2634 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2635 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2636 2637 // -- Diagonalize 2638 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2639 CeedCall(CeedFree(&mass)); 2640 CeedCall(CeedFree(&laplace)); 2641 for (CeedInt i = 0; i < P_1d; i++) { 2642 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2643 } 2644 CeedCall(CeedFree(&x)); 2645 2646 { 2647 CeedInt layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2648 CeedScalar max_norm = 0; 2649 const CeedScalar *assembled_array, *q_weight_array; 2650 CeedVector assembled = NULL, q_weight; 2651 CeedElemRestriction rstr_qf = NULL; 2652 2653 // Assemble QFunction 2654 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2655 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2656 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2657 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2658 2659 // Calculate element averages 2660 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2661 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2662 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2663 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2664 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2665 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2666 2667 for (CeedInt e = 0; e < num_elem; e++) { 2668 CeedInt count = 0; 2669 2670 for (CeedInt q = 0; q < num_qpts; q++) { 2671 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2672 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2673 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2674 count++; 2675 } 2676 } 2677 } 2678 if (count) { 2679 elem_avg[e] /= count; 2680 } else { 2681 elem_avg[e] = 1.0; 2682 } 2683 } 2684 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2685 CeedCall(CeedVectorDestroy(&assembled)); 2686 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2687 CeedCall(CeedVectorDestroy(&q_weight)); 2688 } 2689 2690 // Build FDM diagonal 2691 { 2692 CeedScalar *q_data_array, *fdm_diagonal; 2693 2694 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2695 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2696 for (CeedInt c = 0; c < num_comp; c++) { 2697 for (CeedInt n = 0; n < num_nodes; n++) { 2698 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2699 if (grad) { 2700 for (CeedInt d = 0; d < dim; d++) { 2701 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2702 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2703 } 2704 } 2705 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2706 } 2707 } 2708 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2709 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2710 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2711 for (CeedInt e = 0; e < num_elem; e++) { 2712 for (CeedInt c = 0; c < num_comp; c++) { 2713 for (CeedInt n = 0; n < num_nodes; n++) 2714 q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2715 } 2716 } 2717 CeedCall(CeedFree(&elem_avg)); 2718 CeedCall(CeedFree(&fdm_diagonal)); 2719 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2720 } 2721 2722 // Setup FDM operator 2723 // -- Basis 2724 { 2725 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2726 2727 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2728 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2729 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2730 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2731 CeedCall(CeedFree(&fdm_interp)); 2732 CeedCall(CeedFree(&grad_dummy)); 2733 CeedCall(CeedFree(&q_ref_dummy)); 2734 CeedCall(CeedFree(&q_weight_dummy)); 2735 CeedCall(CeedFree(&lambda)); 2736 } 2737 2738 // -- Restriction 2739 { 2740 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2741 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2742 } 2743 2744 // -- QFunction 2745 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2746 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2747 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2748 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2749 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2750 2751 // -- QFunction context 2752 { 2753 CeedInt *num_comp_data; 2754 2755 CeedCall(CeedCalloc(1, &num_comp_data)); 2756 num_comp_data[0] = num_comp; 2757 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2758 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2759 } 2760 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2761 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2762 2763 // -- Operator 2764 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2765 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2766 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data)); 2767 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2768 2769 // Cleanup 2770 CeedCall(CeedVectorDestroy(&q_data)); 2771 CeedCall(CeedBasisDestroy(&fdm_basis)); 2772 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2773 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2774 return CEED_ERROR_SUCCESS; 2775 } 2776 2777 /// @} 2778