1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 char *source_path_with_name = NULL; 39 40 // Check if NULL qf passed in 41 if (!qf) return CEED_ERROR_SUCCESS; 42 43 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 44 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 45 46 if (qf->source_path) { 47 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 48 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 49 memcpy(source_path_with_name, qf->source_path, path_len); 50 memcpy(&source_path_with_name[path_len], ":", 1); 51 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 52 } else { 53 CeedCall(CeedCalloc(1, &source_path_with_name)); 54 } 55 56 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 57 { 58 CeedQFunctionContext ctx; 59 60 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 61 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 62 } 63 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 64 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 65 } 66 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 67 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 68 } 69 CeedCall(CeedFree(&source_path_with_name)); 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 Ceed ceed_fallback; 84 bool is_composite; 85 CeedOperator op_fallback; 86 87 // Check not already created 88 if (op->op_fallback) return CEED_ERROR_SUCCESS; 89 90 // Fallback Ceed 91 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 92 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 93 94 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 95 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 96 97 // Clone Op 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 117 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 118 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 119 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 120 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 121 op->input_fields[i]->vec)); 122 } 123 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 124 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 125 op->output_fields[i]->vec)); 126 } 127 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 128 // Cleanup 129 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 130 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 131 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 132 } 133 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 134 CeedCall(CeedOperatorCheckReady(op_fallback)); 135 // Note: No ref-counting here so we don't get caught in a reference loop. 136 // The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback. 137 op->op_fallback = op_fallback; 138 op_fallback->op_fallback_parent = op; 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Select correct basis matrix pointer based on CeedEvalMode 144 145 @param[in] basis CeedBasis from which to get the basis matrix 146 @param[in] eval_mode Current basis evaluation mode 147 @param[in] identity Pointer to identity matrix 148 @param[out] basis_ptr Basis pointer to set 149 150 @ref Developer 151 **/ 152 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 153 switch (eval_mode) { 154 case CEED_EVAL_NONE: 155 *basis_ptr = identity; 156 break; 157 case CEED_EVAL_INTERP: 158 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 159 break; 160 case CEED_EVAL_GRAD: 161 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 162 break; 163 case CEED_EVAL_DIV: 164 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 165 break; 166 case CEED_EVAL_CURL: 167 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 168 break; 169 case CEED_EVAL_WEIGHT: 170 break; // Caught by QF Assembly 171 } 172 assert(*basis_ptr != NULL); 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 181 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 182 @param[out] assembled CeedVector to store assembled diagonal 183 184 @return An error code: 0 - success, otherwise - failure 185 186 @ref Developer 187 **/ 188 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) { 189 Ceed ceed; 190 bool is_composite; 191 192 CeedCall(CeedOperatorGetCeed(op, &ceed)); 193 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 194 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 195 196 // Assemble QFunction 197 CeedInt layout_qf[3]; 198 const CeedScalar *assembled_qf_array; 199 CeedVector assembled_qf = NULL; 200 CeedElemRestriction assembled_elem_rstr = NULL; 201 202 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 203 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); 204 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 205 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 206 207 // Get assembly data 208 const CeedEvalMode **eval_modes_in, **eval_modes_out; 209 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 210 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 211 CeedBasis *active_bases_in, *active_bases_out; 212 CeedElemRestriction *active_elem_rstrs_in, *active_elem_rstrs_out; 213 CeedOperatorAssemblyData data; 214 215 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 216 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 217 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, 218 &num_output_components)); 219 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL)); 220 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out)); 221 222 // Loop over all active bases (find matching input/output pairs) 223 for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) { 224 CeedInt b_in, b_out, num_elem, num_nodes, num_qpts, num_comp; 225 bool has_eval_none = false; 226 CeedScalar *elem_diag_array, *identity = NULL; 227 CeedVector elem_diag; 228 CeedElemRestriction diag_elem_rstr; 229 230 if (num_active_bases_in <= num_active_bases_out) { 231 b_in = b; 232 for (b_out = 0; b_out < num_active_bases_out; b_out++) { 233 if (active_bases_in[b_in] == active_bases_out[b_out]) { 234 break; 235 } 236 } 237 if (b_out == num_active_bases_out) { 238 continue; 239 } // No matching output basis found 240 } else { 241 b_out = b; 242 for (b_in = 0; b_in < num_active_bases_in; b_in++) { 243 if (active_bases_in[b_in] == active_bases_out[b_out]) { 244 break; 245 } 246 } 247 if (b_in == num_active_bases_in) { 248 continue; 249 } // No matching output basis found 250 } 251 CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], ceed, CEED_ERROR_UNSUPPORTED, 252 "Cannot assemble operator diagonal with different input and output active element restrictions"); 253 254 // Assemble point block diagonal restriction, if needed 255 if (is_point_block) { 256 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr)); 257 } else { 258 CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr)); 259 } 260 261 // Create diagonal vector 262 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 263 264 // Assemble element operator diagonals 265 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 266 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 267 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 268 CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes)); 269 CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp)); 270 if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes; 271 else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts)); 272 273 // Construct identity matrix for basis if required 274 for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) { 275 has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE); 276 } 277 for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) { 278 has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE); 279 } 280 if (has_eval_none) { 281 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 282 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 283 } 284 285 // Compute the diagonal of B^T D B 286 // Each element 287 for (CeedSize e = 0; e < num_elem; e++) { 288 // Each basis eval mode pair 289 CeedInt d_out = 0, q_comp_out; 290 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 291 292 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) { 293 CeedInt d_in = 0, q_comp_in; 294 const CeedScalar *B_t = NULL; 295 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 296 297 CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t)); 298 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out)); 299 if (q_comp_out > 1) { 300 if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0; 301 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 302 } 303 eval_mode_out_prev = eval_modes_out[b_out][e_out]; 304 305 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) { 306 const CeedScalar *B = NULL; 307 308 CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B)); 309 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in)); 310 if (q_comp_in > 1) { 311 if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0; 312 else B = &B[(++d_in) * num_qpts * num_nodes]; 313 } 314 eval_mode_in_prev = eval_modes_in[b_in][e_in]; 315 316 // Each component 317 for (CeedInt c_out = 0; c_out < num_comp; c_out++) { 318 // Each qpt/node pair 319 for (CeedInt q = 0; q < num_qpts; q++) { 320 if (is_point_block) { 321 // Point Block Diagonal 322 for (CeedInt c_in = 0; c_in < num_comp; c_in++) { 323 const CeedSize c_offset = 324 (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out; 325 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 326 327 for (CeedInt n = 0; n < num_nodes; n++) { 328 elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] += 329 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 330 } 331 } 332 } else { 333 // Diagonal Only 334 const CeedInt c_offset = 335 (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out; 336 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 337 338 for (CeedInt n = 0; n < num_nodes; n++) { 339 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 340 } 341 } 342 } 343 } 344 } 345 } 346 } 347 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 348 349 // Assemble local operator diagonal 350 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 351 352 // Cleanup 353 CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 354 CeedCall(CeedVectorDestroy(&elem_diag)); 355 CeedCall(CeedFree(&identity)); 356 } 357 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 358 CeedCall(CeedVectorDestroy(&assembled_qf)); 359 return CEED_ERROR_SUCCESS; 360 } 361 362 /** 363 @brief Core logic for assembling composite operator diagonal 364 365 @param[in] op CeedOperator to assemble point block diagonal 366 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 367 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 368 @param[out] assembled CeedVector to store assembled diagonal 369 370 @return An error code: 0 - success, otherwise - failure 371 372 @ref Developer 373 **/ 374 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block, 375 CeedVector assembled) { 376 CeedInt num_sub; 377 CeedOperator *suboperators; 378 379 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 380 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 381 for (CeedInt i = 0; i < num_sub; i++) { 382 if (is_point_block) { 383 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 384 } else { 385 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 386 } 387 } 388 return CEED_ERROR_SUCCESS; 389 } 390 391 /** 392 @brief Build nonzero pattern for non-composite operator 393 394 Users should generally use CeedOperatorLinearAssembleSymbolic() 395 396 @param[in] op CeedOperator to assemble nonzero pattern 397 @param[in] offset Offset for number of entries 398 @param[out] rows Row number for each entry 399 @param[out] cols Column number for each entry 400 401 @return An error code: 0 - success, otherwise - failure 402 403 @ref Developer 404 **/ 405 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 406 Ceed ceed; 407 bool is_composite; 408 CeedSize num_nodes_in, num_nodes_out, count = 0; 409 CeedInt num_elem_in, elem_size_in, num_comp_in, layout_er_in[3]; 410 CeedInt num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries; 411 CeedScalar *array; 412 const CeedScalar *elem_dof_a_in, *elem_dof_a_out; 413 CeedVector index_vec_in, index_vec_out, elem_dof_in, elem_dof_out; 414 CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out; 415 416 CeedCall(CeedOperatorGetCeed(op, &ceed)); 417 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 418 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 419 420 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out)); 421 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 422 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 423 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 424 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 425 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in)); 426 427 // Determine elem_dof relation for input 428 CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in)); 429 CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array)); 430 for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i; 431 CeedCall(CeedVectorRestoreArray(index_vec_in, &array)); 432 CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in)); 433 CeedCall(CeedVectorSetValue(elem_dof_in, 0.0)); 434 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in)); 435 CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE)); 436 CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in)); 437 CeedCall(CeedVectorDestroy(&index_vec_in)); 438 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in)); 439 440 if (elem_rstr_in != elem_rstr_out) { 441 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 442 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 443 "Active input and output operator restrictions must have the same number of elements"); 444 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 445 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 446 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out)); 447 448 // Determine elem_dof relation for output 449 CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out)); 450 CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array)); 451 for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i; 452 CeedCall(CeedVectorRestoreArray(index_vec_out, &array)); 453 CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out)); 454 CeedCall(CeedVectorSetValue(elem_dof_out, 0.0)); 455 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out)); 456 CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE)); 457 CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out)); 458 CeedCall(CeedVectorDestroy(&index_vec_out)); 459 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out)); 460 } else { 461 num_elem_out = num_elem_in; 462 elem_size_out = elem_size_in; 463 num_comp_out = num_comp_in; 464 layout_er_out[0] = layout_er_in[0]; 465 layout_er_out[1] = layout_er_in[1]; 466 layout_er_out[2] = layout_er_in[2]; 467 elem_dof_a_out = elem_dof_a_in; 468 } 469 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 470 471 // Determine i, j locations for element matrices 472 for (CeedInt e = 0; e < num_elem_in; e++) { 473 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 474 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 475 for (CeedInt i = 0; i < elem_size_out; i++) { 476 for (CeedInt j = 0; j < elem_size_in; j++) { 477 const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2]; 478 const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2]; 479 const CeedInt row = elem_dof_a_out[elem_dof_index_row]; 480 const CeedInt col = elem_dof_a_in[elem_dof_index_col]; 481 482 rows[offset + count] = row; 483 cols[offset + count] = col; 484 count++; 485 } 486 } 487 } 488 } 489 } 490 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 491 CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in)); 492 CeedCall(CeedVectorDestroy(&elem_dof_in)); 493 if (elem_rstr_in != elem_rstr_out) { 494 CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out)); 495 CeedCall(CeedVectorDestroy(&elem_dof_out)); 496 } 497 return CEED_ERROR_SUCCESS; 498 } 499 500 /** 501 @brief Assemble nonzero entries for non-composite operator 502 503 Users should generally use CeedOperatorLinearAssemble() 504 505 @param[in] op CeedOperator to assemble 506 @param[in] offset Offset for number of entries 507 @param[out] values Values to assemble into matrix 508 509 @return An error code: 0 - success, otherwise - failure 510 511 @ref Developer 512 **/ 513 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 514 Ceed ceed; 515 bool is_composite; 516 517 CeedCall(CeedOperatorGetCeed(op, &ceed)); 518 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 519 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 520 521 // Early exit for empty operator 522 { 523 CeedInt num_elem = 0; 524 525 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 526 if (num_elem == 0) return CEED_ERROR_SUCCESS; 527 } 528 529 if (op->LinearAssembleSingle) { 530 // Backend version 531 CeedCall(op->LinearAssembleSingle(op, offset, values)); 532 return CEED_ERROR_SUCCESS; 533 } else { 534 // Operator fallback 535 CeedOperator op_fallback; 536 537 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 538 if (op_fallback) { 539 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 540 return CEED_ERROR_SUCCESS; 541 } 542 } 543 544 // Assemble QFunction 545 CeedInt layout_qf[3]; 546 const CeedScalar *assembled_qf_array; 547 CeedVector assembled_qf = NULL; 548 CeedElemRestriction assembled_elem_rstr = NULL; 549 550 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE)); 551 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); 552 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 553 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 554 555 // Get assembly data 556 CeedInt num_elem_in, elem_size_in, num_comp_in, num_qpts_in; 557 CeedInt num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries; 558 const CeedEvalMode **eval_modes_in, **eval_modes_out; 559 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 560 CeedBasis *active_bases_in, *active_bases_out, basis_in, basis_out; 561 const CeedScalar **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out; 562 CeedElemRestriction elem_rstr_in, elem_rstr_out; 563 CeedRestrictionType elem_rstr_type_in, elem_rstr_type_out; 564 const bool *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL; 565 const CeedInt8 *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL; 566 CeedOperatorAssemblyData data; 567 568 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 569 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out, 570 &num_eval_modes_out, &eval_modes_out, NULL, NULL)); 571 572 CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED, 573 "Cannot assemble operator with multiple active bases"); 574 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); 575 576 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out)); 577 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 578 basis_in = active_bases_in[0]; 579 basis_out = active_bases_out[0]; 580 B_mat_in = B_mats_in[0]; 581 B_mat_out = B_mats_out[0]; 582 583 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 584 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 585 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 586 if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in; 587 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in)); 588 589 CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in)); 590 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 591 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in)); 592 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 593 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in)); 594 } 595 596 if (elem_rstr_in != elem_rstr_out) { 597 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 598 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 599 "Active input and output operator restrictions must have the same number of elements"); 600 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 601 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 602 if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out; 603 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out)); 604 CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED, 605 "Active input and output bases must have the same number of quadrature points"); 606 607 CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out)); 608 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 609 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out)); 610 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 611 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out)); 612 } 613 } else { 614 num_elem_out = num_elem_in; 615 elem_size_out = elem_size_in; 616 num_comp_out = num_comp_in; 617 num_qpts_out = num_qpts_in; 618 619 elem_rstr_orients_out = elem_rstr_orients_in; 620 elem_rstr_curl_orients_out = elem_rstr_curl_orients_in; 621 } 622 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 623 624 // Loop over elements and put in data structure 625 // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 626 CeedTensorContract contract; 627 CeedSize count = 0; 628 CeedScalar *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL; 629 630 CeedCall(CeedBasisGetTensorContract(basis_in, &contract)); 631 CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat)); 632 CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat)); 633 if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b)); 634 635 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 636 for (CeedSize e = 0; e < num_elem_in; e++) { 637 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 638 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 639 // Compute B^T*D 640 for (CeedSize n = 0; n < elem_size_out; n++) { 641 for (CeedSize q = 0; q < num_qpts_in; q++) { 642 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 643 const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in; 644 CeedScalar sum = 0.0; 645 646 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 647 const CeedSize b_out_index = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n; 648 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out; 649 const CeedSize qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 650 651 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 652 } 653 BTD_mat[btd_index] = sum; 654 } 655 } 656 } 657 658 // Form element matrix itself (for each block component) 659 if (contract) { 660 CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE, 661 false, B_mat_in, elem_mat)); 662 } else { 663 CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0])); 664 } 665 666 // Transform the element matrix if required 667 if (elem_rstr_orients_out) { 668 const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out]; 669 670 for (CeedInt i = 0; i < elem_size_out; i++) { 671 const double orient = elem_orients[i] ? -1.0 : 1.0; 672 673 for (CeedInt j = 0; j < elem_size_in; j++) { 674 elem_mat[i * elem_size_in + j] *= orient; 675 } 676 } 677 } else if (elem_rstr_curl_orients_out) { 678 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out]; 679 680 // T^T*(B^T*D*B) 681 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 682 for (CeedInt i = 0; i < elem_size_out; i++) { 683 for (CeedInt j = 0; j < elem_size_in; j++) { 684 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] + 685 (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) + 686 (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0); 687 } 688 } 689 } 690 if (elem_rstr_orients_in) { 691 const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in]; 692 693 for (CeedInt i = 0; i < elem_size_out; i++) { 694 for (CeedInt j = 0; j < elem_size_in; j++) { 695 elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0; 696 } 697 } 698 } else if (elem_rstr_curl_orients_in) { 699 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in]; 700 701 // (B^T*D*B)*T 702 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 703 for (CeedInt i = 0; i < elem_size_out; i++) { 704 for (CeedInt j = 0; j < elem_size_in; j++) { 705 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] + 706 (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) + 707 (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0); 708 } 709 } 710 } 711 712 // Put element matrix in coordinate data structure 713 for (CeedInt i = 0; i < elem_size_out; i++) { 714 for (CeedInt j = 0; j < elem_size_in; j++) { 715 vals[offset + count] = elem_mat[i * elem_size_in + j]; 716 count++; 717 } 718 } 719 } 720 } 721 } 722 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 723 CeedCall(CeedVectorRestoreArray(values, &vals)); 724 725 // Cleanup 726 CeedCall(CeedFree(&BTD_mat)); 727 CeedCall(CeedFree(&elem_mat)); 728 CeedCall(CeedFree(&elem_mat_b)); 729 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 730 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in)); 731 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 732 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in)); 733 } 734 if (elem_rstr_in != elem_rstr_out) { 735 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 736 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out)); 737 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 738 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out)); 739 } 740 } 741 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 742 CeedCall(CeedVectorDestroy(&assembled_qf)); 743 return CEED_ERROR_SUCCESS; 744 } 745 746 /** 747 @brief Count number of entries for assembled CeedOperator 748 749 @param[in] op CeedOperator to assemble 750 @param[out] num_entries Number of entries in assembled representation 751 752 @return An error code: 0 - success, otherwise - failure 753 754 @ref Utility 755 **/ 756 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) { 757 bool is_composite; 758 CeedInt num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out; 759 CeedElemRestriction rstr_in, rstr_out; 760 761 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 762 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 763 764 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out)); 765 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in)); 766 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in)); 767 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in)); 768 if (rstr_in != rstr_out) { 769 CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out)); 770 CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED, 771 "Active input and output operator restrictions must have the same number of elements"); 772 CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out)); 773 CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out)); 774 } else { 775 num_elem_out = num_elem_in; 776 elem_size_out = elem_size_in; 777 num_comp_out = num_comp_in; 778 } 779 *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in; 780 return CEED_ERROR_SUCCESS; 781 } 782 783 /** 784 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 785 786 @param[in] op_fine Fine grid operator 787 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 788 @param[in] rstr_coarse Coarse grid restriction 789 @param[in] basis_coarse Coarse grid active vector basis 790 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 791 @param[out] op_coarse Coarse grid operator 792 @param[out] op_prolong Coarse to fine operator, or NULL 793 @param[out] op_restrict Fine to coarse operator, or NULL 794 795 @return An error code: 0 - success, otherwise - failure 796 797 @ref Developer 798 **/ 799 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 800 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 801 bool is_composite; 802 Ceed ceed; 803 CeedInt num_comp; 804 CeedVector mult_vec = NULL; 805 CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL; 806 807 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 808 809 // Check for composite operator 810 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 811 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 812 813 // Coarse Grid 814 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 815 // -- Clone input fields 816 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 817 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 818 rstr_fine = op_fine->input_fields[i]->elem_rstr; 819 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 820 } else { 821 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 822 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 823 } 824 } 825 // -- Clone output fields 826 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 827 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 828 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 829 } else { 830 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 831 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 832 } 833 } 834 // -- Clone QFunctionAssemblyData 835 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 836 837 // Multiplicity vector 838 if (op_restrict || op_prolong) { 839 CeedVector mult_e_vec; 840 CeedRestrictionType rstr_type; 841 842 CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type)); 843 CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED, 844 "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported"); 845 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 846 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 847 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 848 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 849 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 850 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 851 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 852 CeedCall(CeedVectorDestroy(&mult_e_vec)); 853 CeedCall(CeedVectorReciprocal(mult_vec)); 854 } 855 856 // Clone name 857 bool has_name = op_fine->name; 858 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 859 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 860 861 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 862 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 863 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 864 865 // Restriction/Prolongation Operators 866 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 867 868 // Restriction 869 if (op_restrict) { 870 CeedInt *num_comp_r_data; 871 CeedQFunctionContext ctx_r; 872 CeedQFunction qf_restrict; 873 874 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 875 CeedCall(CeedCalloc(1, &num_comp_r_data)); 876 num_comp_r_data[0] = num_comp; 877 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 878 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 879 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 880 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 881 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 882 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 883 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 884 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 885 886 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 887 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 888 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 889 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 890 891 // Set name 892 char *restriction_name; 893 894 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 895 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 896 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 897 CeedCall(CeedFree(&restriction_name)); 898 899 // Check 900 CeedCall(CeedOperatorCheckReady(*op_restrict)); 901 902 // Cleanup 903 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 904 } 905 906 // Prolongation 907 if (op_prolong) { 908 CeedInt *num_comp_p_data; 909 CeedQFunctionContext ctx_p; 910 CeedQFunction qf_prolong; 911 912 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 913 CeedCall(CeedCalloc(1, &num_comp_p_data)); 914 num_comp_p_data[0] = num_comp; 915 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 916 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 917 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 918 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 919 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 920 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 921 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 922 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 923 924 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 925 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 926 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 927 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 928 929 // Set name 930 char *prolongation_name; 931 932 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 933 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 934 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 935 CeedCall(CeedFree(&prolongation_name)); 936 937 // Check 938 CeedCall(CeedOperatorCheckReady(*op_prolong)); 939 940 // Cleanup 941 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 942 } 943 944 // Check 945 CeedCall(CeedOperatorCheckReady(*op_coarse)); 946 947 // Cleanup 948 CeedCall(CeedVectorDestroy(&mult_vec)); 949 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 950 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 951 return CEED_ERROR_SUCCESS; 952 } 953 954 /** 955 @brief Build 1D mass matrix and Laplacian with perturbation 956 957 @param[in] interp_1d Interpolation matrix in one dimension 958 @param[in] grad_1d Gradient matrix in one dimension 959 @param[in] q_weight_1d Quadrature weights in one dimension 960 @param[in] P_1d Number of basis nodes in one dimension 961 @param[in] Q_1d Number of quadrature points in one dimension 962 @param[in] dim Dimension of basis 963 @param[out] mass Assembled mass matrix in one dimension 964 @param[out] laplace Assembled perturbed Laplacian in one dimension 965 966 @return An error code: 0 - success, otherwise - failure 967 968 @ref Developer 969 **/ 970 CeedPragmaOptimizeOff 971 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 972 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 973 for (CeedInt i = 0; i < P_1d; i++) { 974 for (CeedInt j = 0; j < P_1d; j++) { 975 CeedScalar sum = 0.0; 976 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 977 mass[i + j * P_1d] = sum; 978 } 979 } 980 // -- Laplacian 981 for (CeedInt i = 0; i < P_1d; i++) { 982 for (CeedInt j = 0; j < P_1d; j++) { 983 CeedScalar sum = 0.0; 984 985 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 986 laplace[i + j * P_1d] = sum; 987 } 988 } 989 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 990 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 991 return CEED_ERROR_SUCCESS; 992 } 993 CeedPragmaOptimizeOn 994 995 /// @} 996 997 /// ---------------------------------------------------------------------------- 998 /// CeedOperator Backend API 999 /// ---------------------------------------------------------------------------- 1000 /// @addtogroup CeedOperatorBackend 1001 /// @{ 1002 1003 /** 1004 @brief Create point block restriction for active operator field 1005 1006 @param[in] rstr Original CeedElemRestriction for active field 1007 @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored 1008 1009 @return An error code: 0 - success, otherwise - failure 1010 1011 @ref Backend 1012 **/ 1013 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) { 1014 Ceed ceed; 1015 CeedInt num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets; 1016 CeedSize l_size; 1017 const CeedInt *offsets; 1018 1019 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 1020 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 1021 1022 // Expand offsets 1023 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 1024 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 1025 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 1026 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 1027 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 1028 shift = num_comp; 1029 if (comp_stride != 1) shift *= num_comp; 1030 CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets)); 1031 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 1032 point_block_offsets[i] = offsets[i] * shift; 1033 } 1034 1035 // Create new restriction 1036 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 1037 point_block_offsets, point_block_rstr)); 1038 1039 // Cleanup 1040 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 1041 return CEED_ERROR_SUCCESS; 1042 } 1043 1044 /** 1045 @brief Create object holding CeedQFunction assembly data for CeedOperator 1046 1047 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1048 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 1049 1050 @return An error code: 0 - success, otherwise - failure 1051 1052 @ref Backend 1053 **/ 1054 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 1055 CeedCall(CeedCalloc(1, data)); 1056 (*data)->ref_count = 1; 1057 (*data)->ceed = ceed; 1058 CeedCall(CeedReference(ceed)); 1059 return CEED_ERROR_SUCCESS; 1060 } 1061 1062 /** 1063 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1064 1065 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 1066 1067 @return An error code: 0 - success, otherwise - failure 1068 1069 @ref Backend 1070 **/ 1071 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1072 data->ref_count++; 1073 return CEED_ERROR_SUCCESS; 1074 } 1075 1076 /** 1077 @brief Set re-use of CeedQFunctionAssemblyData 1078 1079 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 1080 @param[in] reuse_data Boolean flag indicating data re-use 1081 1082 @return An error code: 0 - success, otherwise - failure 1083 1084 @ref Backend 1085 **/ 1086 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 1087 data->reuse_data = reuse_data; 1088 data->needs_data_update = true; 1089 return CEED_ERROR_SUCCESS; 1090 } 1091 1092 /** 1093 @brief Mark QFunctionAssemblyData as stale 1094 1095 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 1096 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 1097 1098 @return An error code: 0 - success, otherwise - failure 1099 1100 @ref Backend 1101 **/ 1102 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 1103 data->needs_data_update = needs_data_update; 1104 return CEED_ERROR_SUCCESS; 1105 } 1106 1107 /** 1108 @brief Determine if QFunctionAssemblyData needs update 1109 1110 @param[in] data CeedQFunctionAssemblyData to mark as stale 1111 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1112 1113 @return An error code: 0 - success, otherwise - failure 1114 1115 @ref Backend 1116 **/ 1117 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1118 *is_update_needed = !data->reuse_data || data->needs_data_update; 1119 return CEED_ERROR_SUCCESS; 1120 } 1121 1122 /** 1123 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1124 1125 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1126 1127 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1128 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1129 CeedQFunctionAssemblyData. 1130 1131 @param[in] data CeedQFunctionAssemblyData to copy reference to 1132 @param[in,out] data_copy Variable to store copied reference 1133 1134 @return An error code: 0 - success, otherwise - failure 1135 1136 @ref Backend 1137 **/ 1138 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1139 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1140 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1141 *data_copy = data; 1142 return CEED_ERROR_SUCCESS; 1143 } 1144 1145 /** 1146 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1147 1148 @param[in] data CeedQFunctionAssemblyData to retrieve status 1149 @param[out] is_setup Boolean flag for setup status 1150 1151 @return An error code: 0 - success, otherwise - failure 1152 1153 @ref Backend 1154 **/ 1155 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1156 *is_setup = data->is_setup; 1157 return CEED_ERROR_SUCCESS; 1158 } 1159 1160 /** 1161 @brief Set internal objects for CeedQFunctionAssemblyData 1162 1163 @param[in,out] data CeedQFunctionAssemblyData to set objects 1164 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1165 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1166 1167 @return An error code: 0 - success, otherwise - failure 1168 1169 @ref Backend 1170 **/ 1171 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1172 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1173 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1174 1175 data->is_setup = true; 1176 return CEED_ERROR_SUCCESS; 1177 } 1178 1179 /** 1180 @brief Get internal objects for CeedQFunctionAssemblyData 1181 1182 @param[in,out] data CeedQFunctionAssemblyData to set objects 1183 @param[out] vec CeedVector to store assembled CeedQFunction at quadrature points 1184 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1185 1186 @return An error code: 0 - success, otherwise - failure 1187 1188 @ref Backend 1189 **/ 1190 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1191 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1192 1193 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1194 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1195 return CEED_ERROR_SUCCESS; 1196 } 1197 1198 /** 1199 @brief Destroy CeedQFunctionAssemblyData 1200 1201 @param[in,out] data CeedQFunctionAssemblyData to destroy 1202 1203 @return An error code: 0 - success, otherwise - failure 1204 1205 @ref Backend 1206 **/ 1207 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1208 if (!*data || --(*data)->ref_count > 0) { 1209 *data = NULL; 1210 return CEED_ERROR_SUCCESS; 1211 } 1212 CeedCall(CeedDestroy(&(*data)->ceed)); 1213 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1214 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1215 1216 CeedCall(CeedFree(data)); 1217 return CEED_ERROR_SUCCESS; 1218 } 1219 1220 /** 1221 @brief Get CeedOperatorAssemblyData 1222 1223 @param[in] op CeedOperator to assemble 1224 @param[out] data CeedQFunctionAssemblyData 1225 1226 @return An error code: 0 - success, otherwise - failure 1227 1228 @ref Backend 1229 **/ 1230 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1231 if (!op->op_assembled) { 1232 CeedOperatorAssemblyData data; 1233 1234 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1235 op->op_assembled = data; 1236 } 1237 *data = op->op_assembled; 1238 return CEED_ERROR_SUCCESS; 1239 } 1240 1241 /** 1242 @brief Create object holding CeedOperator assembly data. 1243 1244 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1245 An array with references to the corresponding active CeedElemRestrictions is also stored. 1246 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1247 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1248 CeedEvalMode. 1249 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1250 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1251 1252 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1253 @param[in] op CeedOperator to be assembled 1254 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1255 1256 @return An error code: 0 - success, otherwise - failure 1257 1258 @ref Backend 1259 **/ 1260 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1261 CeedInt num_active_bases_in = 0, num_active_bases_out = 0, offset = 0; 1262 CeedInt num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL; 1263 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1264 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1265 CeedQFunctionField *qf_fields; 1266 CeedQFunction qf; 1267 CeedOperatorField *op_fields; 1268 bool is_composite; 1269 1270 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1271 CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators."); 1272 1273 // Allocate 1274 CeedCall(CeedCalloc(1, data)); 1275 (*data)->ceed = ceed; 1276 CeedCall(CeedReference(ceed)); 1277 1278 // Build OperatorAssembly data 1279 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1280 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1281 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1282 1283 // Determine active input basis 1284 for (CeedInt i = 0; i < num_input_fields; i++) { 1285 CeedVector vec; 1286 1287 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1288 if (vec == CEED_VECTOR_ACTIVE) { 1289 CeedInt index = -1, num_comp, q_comp; 1290 CeedEvalMode eval_mode; 1291 CeedBasis basis_in = NULL; 1292 1293 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1294 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1295 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1296 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1297 for (CeedInt i = 0; i < num_active_bases_in; i++) { 1298 if ((*data)->active_bases_in[i] == basis_in) index = i; 1299 } 1300 if (index == -1) { 1301 CeedElemRestriction elem_rstr_in; 1302 1303 index = num_active_bases_in; 1304 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in)); 1305 (*data)->active_bases_in[num_active_bases_in] = NULL; 1306 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in])); 1307 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in)); 1308 (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL; 1309 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1310 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in])); 1311 CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in)); 1312 num_eval_modes_in[index] = 0; 1313 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in)); 1314 eval_modes_in[index] = NULL; 1315 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in)); 1316 eval_mode_offsets_in[index] = NULL; 1317 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in)); 1318 (*data)->assembled_bases_in[index] = NULL; 1319 num_active_bases_in++; 1320 } 1321 if (eval_mode != CEED_EVAL_WEIGHT) { 1322 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1323 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1324 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1325 for (CeedInt d = 0; d < q_comp; d++) { 1326 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1327 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1328 offset += num_comp; 1329 } 1330 num_eval_modes_in[index] += q_comp; 1331 } 1332 } 1333 } 1334 1335 // Determine active output basis 1336 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1337 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1338 offset = 0; 1339 for (CeedInt i = 0; i < num_output_fields; i++) { 1340 CeedVector vec; 1341 1342 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1343 if (vec == CEED_VECTOR_ACTIVE) { 1344 CeedInt index = -1, num_comp, q_comp; 1345 CeedEvalMode eval_mode; 1346 CeedBasis basis_out = NULL; 1347 1348 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1349 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1350 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1351 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1352 for (CeedInt i = 0; i < num_active_bases_out; i++) { 1353 if ((*data)->active_bases_out[i] == basis_out) index = i; 1354 } 1355 if (index == -1) { 1356 CeedElemRestriction elem_rstr_out; 1357 1358 index = num_active_bases_out; 1359 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out)); 1360 (*data)->active_bases_out[num_active_bases_out] = NULL; 1361 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out])); 1362 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out)); 1363 (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL; 1364 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1365 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out])); 1366 CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out)); 1367 num_eval_modes_out[index] = 0; 1368 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out)); 1369 eval_modes_out[index] = NULL; 1370 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out)); 1371 eval_mode_offsets_out[index] = NULL; 1372 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out)); 1373 (*data)->assembled_bases_out[index] = NULL; 1374 num_active_bases_out++; 1375 } 1376 if (eval_mode != CEED_EVAL_WEIGHT) { 1377 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1378 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1379 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1380 for (CeedInt d = 0; d < q_comp; d++) { 1381 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1382 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1383 offset += num_comp; 1384 } 1385 num_eval_modes_out[index] += q_comp; 1386 } 1387 } 1388 } 1389 (*data)->num_active_bases_in = num_active_bases_in; 1390 (*data)->num_eval_modes_in = num_eval_modes_in; 1391 (*data)->eval_modes_in = eval_modes_in; 1392 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1393 (*data)->num_active_bases_out = num_active_bases_out; 1394 (*data)->num_eval_modes_out = num_eval_modes_out; 1395 (*data)->eval_modes_out = eval_modes_out; 1396 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1397 (*data)->num_output_components = offset; 1398 return CEED_ERROR_SUCCESS; 1399 } 1400 1401 /** 1402 @brief Get CeedOperator CeedEvalModes for assembly. 1403 1404 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1405 1406 @param[in] data CeedOperatorAssemblyData 1407 @param[out] num_active_bases_in Total number of active bases for input 1408 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1409 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1410 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1411 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1412 @param[out] num_active_bases_out Total number of active bases for output 1413 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1414 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1415 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1416 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1417 including contributions of all active bases 1418 1419 @return An error code: 0 - success, otherwise - failure 1420 1421 @ref Backend 1422 **/ 1423 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in, 1424 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out, 1425 CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, 1426 CeedSize *num_output_components) { 1427 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1428 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1429 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1430 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1431 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1432 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1433 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1434 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1435 if (num_output_components) *num_output_components = data->num_output_components; 1436 return CEED_ERROR_SUCCESS; 1437 } 1438 1439 /** 1440 @brief Get CeedOperator CeedBasis data for assembly. 1441 1442 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1443 1444 @param[in] data CeedOperatorAssemblyData 1445 @param[out] num_active_bases_in Number of active input bases, or NULL 1446 @param[out] active_bases_in Pointer to hold active input CeedBasis, or NULL 1447 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1448 @param[out] num_active_bases_out Number of active output bases, or NULL 1449 @param[out] active_bases_out Pointer to hold active output CeedBasis, or NULL 1450 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1451 1452 @return An error code: 0 - success, otherwise - failure 1453 1454 @ref Backend 1455 **/ 1456 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in, 1457 const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out, 1458 const CeedScalar ***assembled_bases_out) { 1459 // Assemble B_in, B_out if needed 1460 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1461 CeedInt num_qpts; 1462 1463 if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts)); 1464 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts)); 1465 for (CeedInt b = 0; b < data->num_active_bases_in; b++) { 1466 bool has_eval_none = false; 1467 CeedInt num_nodes; 1468 CeedScalar *B_in = NULL, *identity = NULL; 1469 1470 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes)); 1471 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1472 1473 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1474 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1475 } 1476 if (has_eval_none) { 1477 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1478 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1479 identity[i * num_nodes + i] = 1.0; 1480 } 1481 } 1482 1483 for (CeedInt q = 0; q < num_qpts; q++) { 1484 for (CeedInt n = 0; n < num_nodes; n++) { 1485 CeedInt d_in = 0, q_comp_in; 1486 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1487 1488 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1489 const CeedInt qq = data->num_eval_modes_in[b] * q; 1490 const CeedScalar *B = NULL; 1491 1492 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B)); 1493 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1494 if (q_comp_in > 1) { 1495 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1496 else B = &B[(++d_in) * num_qpts * num_nodes]; 1497 } 1498 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1499 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1500 } 1501 } 1502 } 1503 if (identity) CeedCall(CeedFree(&identity)); 1504 data->assembled_bases_in[b] = B_in; 1505 } 1506 } 1507 1508 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1509 CeedInt num_qpts; 1510 1511 if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts)); 1512 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts)); 1513 for (CeedInt b = 0; b < data->num_active_bases_out; b++) { 1514 bool has_eval_none = false; 1515 CeedInt num_nodes; 1516 CeedScalar *B_out = NULL, *identity = NULL; 1517 1518 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes)); 1519 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1520 1521 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1522 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1523 } 1524 if (has_eval_none) { 1525 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1526 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1527 identity[i * num_nodes + i] = 1.0; 1528 } 1529 } 1530 1531 for (CeedInt q = 0; q < num_qpts; q++) { 1532 for (CeedInt n = 0; n < num_nodes; n++) { 1533 CeedInt d_out = 0, q_comp_out; 1534 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1535 1536 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1537 const CeedInt qq = data->num_eval_modes_out[b] * q; 1538 const CeedScalar *B = NULL; 1539 1540 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B)); 1541 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1542 if (q_comp_out > 1) { 1543 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1544 else B = &B[(++d_out) * num_qpts * num_nodes]; 1545 } 1546 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1547 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1548 } 1549 } 1550 } 1551 if (identity) CeedCall(CeedFree(&identity)); 1552 data->assembled_bases_out[b] = B_out; 1553 } 1554 } 1555 1556 // Pass out assembled data 1557 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1558 if (active_bases_in) *active_bases_in = data->active_bases_in; 1559 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1560 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1561 if (active_bases_out) *active_bases_out = data->active_bases_out; 1562 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1563 return CEED_ERROR_SUCCESS; 1564 } 1565 1566 /** 1567 @brief Get CeedOperator CeedBasis data for assembly. 1568 1569 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1570 1571 @param[in] data CeedOperatorAssemblyData 1572 @param[out] num_active_elem_rstrs_in Number of active input element restrictions, or NULL 1573 @param[out] active_elem_rstrs_in Pointer to hold active input CeedElemRestrictions, or NULL 1574 @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL 1575 @param[out] active_elem_rstrs_out Pointer to hold active output CeedElemRestrictions, or NULL 1576 1577 @return An error code: 0 - success, otherwise - failure 1578 1579 @ref Backend 1580 **/ 1581 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in, 1582 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out, 1583 CeedElemRestriction **active_elem_rstrs_out) { 1584 if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in; 1585 if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in; 1586 if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out; 1587 if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out; 1588 return CEED_ERROR_SUCCESS; 1589 } 1590 1591 /** 1592 @brief Destroy CeedOperatorAssemblyData 1593 1594 @param[in,out] data CeedOperatorAssemblyData to destroy 1595 1596 @return An error code: 0 - success, otherwise - failure 1597 1598 @ref Backend 1599 **/ 1600 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1601 if (!*data) { 1602 *data = NULL; 1603 return CEED_ERROR_SUCCESS; 1604 } 1605 CeedCall(CeedDestroy(&(*data)->ceed)); 1606 for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) { 1607 CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b])); 1608 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b])); 1609 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1610 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1611 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1612 } 1613 for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) { 1614 CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b])); 1615 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b])); 1616 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1617 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1618 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1619 } 1620 CeedCall(CeedFree(&(*data)->active_bases_in)); 1621 CeedCall(CeedFree(&(*data)->active_bases_out)); 1622 CeedCall(CeedFree(&(*data)->active_elem_rstrs_in)); 1623 CeedCall(CeedFree(&(*data)->active_elem_rstrs_out)); 1624 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1625 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1626 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1627 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1628 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1629 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1630 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1631 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1632 1633 CeedCall(CeedFree(data)); 1634 return CEED_ERROR_SUCCESS; 1635 } 1636 1637 /** 1638 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 1639 1640 @param[in] op CeedOperator to retrieve fallback for 1641 @param[out] op_fallback Fallback CeedOperator 1642 1643 @return An error code: 0 - success, otherwise - failure 1644 1645 @ref Backend 1646 **/ 1647 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 1648 // Create if needed 1649 if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op)); 1650 if (op->op_fallback) { 1651 bool is_debug; 1652 1653 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 1654 if (is_debug) { 1655 Ceed ceed, ceed_fallback; 1656 const char *resource, *resource_fallback; 1657 1658 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1659 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 1660 CeedCall(CeedGetResource(ceed, &resource)); 1661 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 1662 1663 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n"); 1664 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 1665 op->op_fallback); 1666 } 1667 } 1668 *op_fallback = op->op_fallback; 1669 return CEED_ERROR_SUCCESS; 1670 } 1671 1672 /** 1673 @brief Get the parent CeedOperator for a fallback CeedOperator 1674 1675 @param[in] op CeedOperator context 1676 @param[out] parent Variable to store parent CeedOperator context 1677 1678 @return An error code: 0 - success, otherwise - failure 1679 1680 @ref Backend 1681 **/ 1682 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) { 1683 *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL; 1684 return CEED_ERROR_SUCCESS; 1685 } 1686 1687 /** 1688 @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator 1689 1690 @param[in] op CeedOperator context 1691 @param[out] parent Variable to store parent Ceed context 1692 1693 @return An error code: 0 - success, otherwise - failure 1694 1695 @ref Backend 1696 **/ 1697 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) { 1698 *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed; 1699 return CEED_ERROR_SUCCESS; 1700 } 1701 1702 /// @} 1703 1704 /// ---------------------------------------------------------------------------- 1705 /// CeedOperator Public API 1706 /// ---------------------------------------------------------------------------- 1707 /// @addtogroup CeedOperatorUser 1708 /// @{ 1709 1710 /** 1711 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1712 1713 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1714 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1715 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1716 1717 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1718 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1719 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1720 1721 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1722 1723 @param[in] op CeedOperator to assemble CeedQFunction 1724 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1725 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1726 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1727 1728 @return An error code: 0 - success, otherwise - failure 1729 1730 @ref User 1731 **/ 1732 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1733 CeedCall(CeedOperatorCheckReady(op)); 1734 1735 if (op->LinearAssembleQFunction) { 1736 // Backend version 1737 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1738 } else { 1739 // Operator fallback 1740 CeedOperator op_fallback; 1741 1742 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1743 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1744 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1745 } 1746 return CEED_ERROR_SUCCESS; 1747 } 1748 1749 /** 1750 @brief Assemble CeedQFunction and store result internally. 1751 1752 Return copied references of stored data to the caller. 1753 Caller is responsible for ownership and destruction of the copied references. 1754 See also @ref CeedOperatorLinearAssembleQFunction 1755 1756 Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers. 1757 These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object. 1758 1759 @param[in] op CeedOperator to assemble CeedQFunction 1760 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1761 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1762 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1763 1764 @return An error code: 0 - success, otherwise - failure 1765 1766 @ref User 1767 **/ 1768 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1769 int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL; 1770 CeedOperator op_assemble = NULL; 1771 CeedOperator op_fallback_parent = NULL; 1772 1773 CeedCall(CeedOperatorCheckReady(op)); 1774 1775 // Determine if fallback parent or operator has implementation 1776 CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent)); 1777 if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) { 1778 // -- Backend version for op fallback parent is faster, if it exists 1779 LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate; 1780 op_assemble = op_fallback_parent; 1781 } else if (op->LinearAssembleQFunctionUpdate) { 1782 // -- Backend version for op 1783 LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate; 1784 op_assemble = op; 1785 } 1786 1787 // Assemble QFunction 1788 if (LinearAssembleQFunctionUpdate) { 1789 // Backend or fallback parent version 1790 bool qf_assembled_is_setup; 1791 CeedVector assembled_vec = NULL; 1792 CeedElemRestriction assembled_rstr = NULL; 1793 1794 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1795 if (qf_assembled_is_setup) { 1796 bool update_needed; 1797 1798 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1799 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1800 if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request)); 1801 } else { 1802 CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request)); 1803 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1804 } 1805 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1806 1807 // Copy reference from internally held copy 1808 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1809 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1810 CeedCall(CeedVectorDestroy(&assembled_vec)); 1811 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1812 } else { 1813 // Operator fallback 1814 CeedOperator op_fallback; 1815 1816 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1817 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1818 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1819 } 1820 return CEED_ERROR_SUCCESS; 1821 } 1822 1823 /** 1824 @brief Assemble the diagonal of a square linear CeedOperator 1825 1826 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1827 1828 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1829 1830 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1831 1832 @param[in] op CeedOperator to assemble CeedQFunction 1833 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1834 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1835 1836 @return An error code: 0 - success, otherwise - failure 1837 1838 @ref User 1839 **/ 1840 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1841 bool is_composite; 1842 CeedSize input_size = 0, output_size = 0; 1843 1844 CeedCall(CeedOperatorCheckReady(op)); 1845 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1846 1847 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1848 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1849 1850 // Early exit for empty operator 1851 if (!is_composite) { 1852 CeedInt num_elem = 0; 1853 1854 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1855 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1856 } 1857 1858 if (op->LinearAssembleDiagonal) { 1859 // Backend version 1860 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1861 return CEED_ERROR_SUCCESS; 1862 } else if (op->LinearAssembleAddDiagonal) { 1863 // Backend version with zeroing first 1864 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1865 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1866 return CEED_ERROR_SUCCESS; 1867 } else { 1868 // Operator fallback 1869 CeedOperator op_fallback; 1870 1871 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1872 if (op_fallback) { 1873 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1874 return CEED_ERROR_SUCCESS; 1875 } 1876 } 1877 // Default interface implementation 1878 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1879 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1880 return CEED_ERROR_SUCCESS; 1881 } 1882 1883 /** 1884 @brief Assemble the diagonal of a square linear CeedOperator 1885 1886 This sums into a CeedVector the diagonal of a linear CeedOperator. 1887 1888 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1889 1890 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1891 1892 @param[in] op CeedOperator to assemble CeedQFunction 1893 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1894 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1895 1896 @return An error code: 0 - success, otherwise - failure 1897 1898 @ref User 1899 **/ 1900 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1901 bool is_composite; 1902 CeedSize input_size = 0, output_size = 0; 1903 1904 CeedCall(CeedOperatorCheckReady(op)); 1905 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1906 1907 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1908 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1909 1910 // Early exit for empty operator 1911 if (!is_composite) { 1912 CeedInt num_elem = 0; 1913 1914 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1915 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1916 } 1917 1918 if (op->LinearAssembleAddDiagonal) { 1919 // Backend version 1920 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1921 return CEED_ERROR_SUCCESS; 1922 } else { 1923 // Operator fallback 1924 CeedOperator op_fallback; 1925 1926 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1927 if (op_fallback) { 1928 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1929 return CEED_ERROR_SUCCESS; 1930 } 1931 } 1932 // Default interface implementation 1933 if (is_composite) { 1934 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1935 } else { 1936 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1937 } 1938 return CEED_ERROR_SUCCESS; 1939 } 1940 1941 /** 1942 @brief Fully assemble the point-block diagonal pattern of a linear operator. 1943 1944 Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal(). 1945 1946 The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the 1947 matrix in entry (i, j). 1948 Note that the (i, j) pairs are unique. 1949 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in 1950 the same ordering. 1951 1952 This will generally be slow unless your operator is low-order. 1953 1954 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1955 1956 @param[in] op CeedOperator to assemble 1957 @param[out] num_entries Number of entries in coordinate nonzero pattern 1958 @param[out] rows Row number for each entry 1959 @param[out] cols Column number for each entry 1960 1961 @ref User 1962 **/ 1963 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1964 Ceed ceed; 1965 bool is_composite; 1966 CeedInt num_active_components, num_sub_operators; 1967 CeedOperator *sub_operators; 1968 1969 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1970 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1971 1972 CeedSize input_size = 0, output_size = 0; 1973 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1974 CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1975 1976 if (is_composite) { 1977 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators)); 1978 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1979 } else { 1980 sub_operators = &op; 1981 num_sub_operators = 1; 1982 } 1983 1984 // Verify operator can be assembled correctly 1985 { 1986 CeedOperatorAssemblyData data; 1987 CeedInt num_active_elem_rstrs, comp_stride; 1988 CeedElemRestriction *active_elem_rstrs; 1989 1990 // Get initial values to check against 1991 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data)); 1992 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 1993 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride)); 1994 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components)); 1995 1996 // Verify that all active element restrictions have same component stride and number of components 1997 for (CeedInt k = 0; k < num_sub_operators; k++) { 1998 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data)); 1999 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 2000 for (CeedInt i = 0; i < num_active_elem_rstrs; i++) { 2001 CeedInt comp_stride_sub, num_active_components_sub; 2002 2003 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub)); 2004 CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION, 2005 "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub); 2006 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub)); 2007 CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE, 2008 "All suboperators must have the same number of output components"); 2009 } 2010 } 2011 } 2012 *num_entries = input_size * num_active_components; 2013 CeedCall(CeedCalloc(*num_entries, rows)); 2014 CeedCall(CeedCalloc(*num_entries, cols)); 2015 2016 for (CeedInt o = 0; o < num_sub_operators; o++) { 2017 CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr; 2018 CeedInt comp_stride, num_elem, elem_size; 2019 const CeedInt *offsets, *point_block_offsets; 2020 2021 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr)); 2022 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride)); 2023 CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem)); 2024 CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size)); 2025 CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets)); 2026 2027 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr)); 2028 CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets)); 2029 2030 for (CeedSize i = 0; i < num_elem * elem_size; i++) { 2031 for (CeedInt c_out = 0; c_out < num_active_components; c_out++) { 2032 for (CeedInt c_in = 0; c_in < num_active_components; c_in++) { 2033 (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride; 2034 (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride; 2035 } 2036 } 2037 } 2038 2039 CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets)); 2040 CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets)); 2041 CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr)); 2042 } 2043 return CEED_ERROR_SUCCESS; 2044 } 2045 2046 /** 2047 @brief Assemble the point block diagonal of a square linear CeedOperator 2048 2049 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 2050 2051 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 2052 2053 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2054 2055 @param[in] op CeedOperator to assemble CeedQFunction 2056 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 2057 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 2058 component in]. 2059 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2060 2061 @return An error code: 0 - success, otherwise - failure 2062 2063 @ref User 2064 **/ 2065 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2066 bool is_composite; 2067 CeedSize input_size = 0, output_size = 0; 2068 2069 CeedCall(CeedOperatorCheckReady(op)); 2070 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2071 2072 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2073 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2074 2075 // Early exit for empty operator 2076 if (!is_composite) { 2077 CeedInt num_elem = 0; 2078 2079 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2080 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2081 } 2082 2083 if (op->LinearAssemblePointBlockDiagonal) { 2084 // Backend version 2085 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 2086 return CEED_ERROR_SUCCESS; 2087 } else if (op->LinearAssembleAddPointBlockDiagonal) { 2088 // Backend version with zeroing first 2089 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2090 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2091 return CEED_ERROR_SUCCESS; 2092 } else { 2093 // Operator fallback 2094 CeedOperator op_fallback; 2095 2096 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2097 if (op_fallback) { 2098 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 2099 return CEED_ERROR_SUCCESS; 2100 } 2101 } 2102 // Default interface implementation 2103 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2104 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2105 return CEED_ERROR_SUCCESS; 2106 } 2107 2108 /** 2109 @brief Assemble the point block diagonal of a square linear CeedOperator 2110 2111 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 2112 2113 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 2114 2115 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2116 2117 @param[in] op CeedOperator to assemble CeedQFunction 2118 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 2119 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 2120 component in]. 2121 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2122 2123 @return An error code: 0 - success, otherwise - failure 2124 2125 @ref User 2126 **/ 2127 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2128 bool is_composite; 2129 CeedSize input_size = 0, output_size = 0; 2130 2131 CeedCall(CeedOperatorCheckReady(op)); 2132 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2133 2134 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2135 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2136 2137 // Early exit for empty operator 2138 if (!is_composite) { 2139 CeedInt num_elem = 0; 2140 2141 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2142 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2143 } 2144 2145 if (op->LinearAssembleAddPointBlockDiagonal) { 2146 // Backend version 2147 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2148 return CEED_ERROR_SUCCESS; 2149 } else { 2150 // Operator fallback 2151 CeedOperator op_fallback; 2152 2153 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2154 if (op_fallback) { 2155 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 2156 return CEED_ERROR_SUCCESS; 2157 } 2158 } 2159 // Default interface implementation 2160 if (is_composite) { 2161 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 2162 } else { 2163 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 2164 } 2165 return CEED_ERROR_SUCCESS; 2166 } 2167 2168 /** 2169 @brief Fully assemble the nonzero pattern of a linear operator. 2170 2171 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 2172 2173 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 2174 matrix in entry (i, j). 2175 Note that the (i, j) pairs are not unique and may repeat. 2176 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 2177 2178 This will generally be slow unless your operator is low-order. 2179 2180 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2181 2182 @param[in] op CeedOperator to assemble 2183 @param[out] num_entries Number of entries in coordinate nonzero pattern 2184 @param[out] rows Row number for each entry 2185 @param[out] cols Column number for each entry 2186 2187 @ref User 2188 **/ 2189 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 2190 bool is_composite; 2191 CeedInt num_suboperators, offset = 0; 2192 CeedSize single_entries; 2193 CeedOperator *sub_operators; 2194 2195 CeedCall(CeedOperatorCheckReady(op)); 2196 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2197 2198 if (op->LinearAssembleSymbolic) { 2199 // Backend version 2200 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 2201 return CEED_ERROR_SUCCESS; 2202 } else { 2203 // Operator fallback 2204 CeedOperator op_fallback; 2205 2206 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2207 if (op_fallback) { 2208 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 2209 return CEED_ERROR_SUCCESS; 2210 } 2211 } 2212 2213 // Default interface implementation 2214 2215 // Count entries and allocate rows, cols arrays 2216 *num_entries = 0; 2217 if (is_composite) { 2218 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2219 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2220 for (CeedInt k = 0; k < num_suboperators; ++k) { 2221 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2222 *num_entries += single_entries; 2223 } 2224 } else { 2225 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 2226 *num_entries += single_entries; 2227 } 2228 CeedCall(CeedCalloc(*num_entries, rows)); 2229 CeedCall(CeedCalloc(*num_entries, cols)); 2230 2231 // Assemble nonzero locations 2232 if (is_composite) { 2233 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2234 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2235 for (CeedInt k = 0; k < num_suboperators; ++k) { 2236 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 2237 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2238 offset += single_entries; 2239 } 2240 } else { 2241 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 2242 } 2243 return CEED_ERROR_SUCCESS; 2244 } 2245 2246 /** 2247 @brief Fully assemble the nonzero entries of a linear operator. 2248 2249 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 2250 2251 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 2252 matrix in entry (i, j). 2253 Note that the (i, j) pairs are not unique and may repeat. 2254 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 2255 2256 This will generally be slow unless your operator is low-order. 2257 2258 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2259 2260 @param[in] op CeedOperator to assemble 2261 @param[out] values Values to assemble into matrix 2262 2263 @ref User 2264 **/ 2265 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 2266 bool is_composite; 2267 CeedInt num_suboperators, offset = 0; 2268 CeedSize single_entries = 0; 2269 CeedOperator *sub_operators; 2270 2271 CeedCall(CeedOperatorCheckReady(op)); 2272 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2273 2274 // Early exit for empty operator 2275 if (!is_composite) { 2276 CeedInt num_elem = 0; 2277 2278 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2279 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2280 } 2281 2282 if (op->LinearAssemble) { 2283 // Backend version 2284 CeedCall(op->LinearAssemble(op, values)); 2285 return CEED_ERROR_SUCCESS; 2286 } else { 2287 // Operator fallback 2288 CeedOperator op_fallback; 2289 2290 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2291 if (op_fallback) { 2292 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2293 return CEED_ERROR_SUCCESS; 2294 } 2295 } 2296 2297 // Default interface implementation 2298 CeedCall(CeedVectorSetValue(values, 0.0)); 2299 if (is_composite) { 2300 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2301 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2302 for (CeedInt k = 0; k < num_suboperators; k++) { 2303 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2304 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2305 offset += single_entries; 2306 } 2307 } else { 2308 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2309 } 2310 return CEED_ERROR_SUCCESS; 2311 } 2312 2313 /** 2314 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2315 2316 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2317 2318 @param[in] op Composite CeedOperator 2319 @param[in] num_skip_indices Number of suboperators to skip 2320 @param[in] skip_indices Array of indices of suboperators to skip 2321 @param[out] mult Vector to store multiplicity (of size l_size) 2322 2323 @return An error code: 0 - success, otherwise - failure 2324 2325 @ref User 2326 **/ 2327 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2328 Ceed ceed; 2329 CeedInt num_suboperators; 2330 CeedSize l_vec_len; 2331 CeedScalar *mult_array; 2332 CeedVector ones_l_vec; 2333 CeedElemRestriction elem_rstr, mult_elem_rstr; 2334 CeedOperator *sub_operators; 2335 2336 CeedCall(CeedOperatorCheckReady(op)); 2337 2338 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2339 2340 // Zero mult vector 2341 CeedCall(CeedVectorSetValue(mult, 0.0)); 2342 2343 // Get suboperators 2344 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2345 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2346 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2347 2348 // Work vector 2349 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2350 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2351 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2352 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2353 2354 // Compute multiplicity across suboperators 2355 for (CeedInt i = 0; i < num_suboperators; i++) { 2356 const CeedScalar *sub_mult_array; 2357 CeedVector sub_mult_l_vec, ones_e_vec; 2358 2359 // -- Check for suboperator to skip 2360 for (CeedInt j = 0; j < num_skip_indices; j++) { 2361 if (skip_indices[j] == i) continue; 2362 } 2363 2364 // -- Sub operator multiplicity 2365 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2366 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr)); 2367 CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2368 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2369 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2370 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2371 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2372 // ---- Flag every node present in the current suboperator 2373 for (CeedInt j = 0; j < l_vec_len; j++) { 2374 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2375 } 2376 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2377 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2378 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2379 CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr)); 2380 } 2381 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2382 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2383 return CEED_ERROR_SUCCESS; 2384 } 2385 2386 /** 2387 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2388 grid interpolation 2389 2390 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2391 2392 @param[in] op_fine Fine grid operator 2393 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2394 @param[in] rstr_coarse Coarse grid restriction 2395 @param[in] basis_coarse Coarse grid active vector basis 2396 @param[out] op_coarse Coarse grid operator 2397 @param[out] op_prolong Coarse to fine operator, or NULL 2398 @param[out] op_restrict Fine to coarse operator, or NULL 2399 2400 @return An error code: 0 - success, otherwise - failure 2401 2402 @ref User 2403 **/ 2404 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2405 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2406 CeedBasis basis_c_to_f = NULL; 2407 2408 CeedCall(CeedOperatorCheckReady(op_fine)); 2409 2410 // Build prolongation matrix, if required 2411 if (op_prolong || op_restrict) { 2412 CeedBasis basis_fine; 2413 2414 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2415 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2416 } 2417 2418 // Core code 2419 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2420 return CEED_ERROR_SUCCESS; 2421 } 2422 2423 /** 2424 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2425 2426 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2427 2428 @param[in] op_fine Fine grid operator 2429 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2430 @param[in] rstr_coarse Coarse grid restriction 2431 @param[in] basis_coarse Coarse grid active vector basis 2432 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2433 @param[out] op_coarse Coarse grid operator 2434 @param[out] op_prolong Coarse to fine operator, or NULL 2435 @param[out] op_restrict Fine to coarse operator, or NULL 2436 2437 @return An error code: 0 - success, otherwise - failure 2438 2439 @ref User 2440 **/ 2441 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2442 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2443 CeedOperator *op_restrict) { 2444 Ceed ceed; 2445 CeedInt Q_f, Q_c; 2446 CeedBasis basis_fine, basis_c_to_f = NULL; 2447 2448 CeedCall(CeedOperatorCheckReady(op_fine)); 2449 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2450 2451 // Check for compatible quadrature spaces 2452 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2453 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2454 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2455 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2456 2457 // Create coarse to fine basis, if required 2458 if (op_prolong || op_restrict) { 2459 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2460 CeedScalar *q_ref, *q_weight, *grad; 2461 2462 // Check if interpolation matrix is provided 2463 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2464 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2465 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2466 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2467 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2468 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2469 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2470 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2471 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2472 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2473 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2474 CeedCall(CeedFree(&q_ref)); 2475 CeedCall(CeedFree(&q_weight)); 2476 CeedCall(CeedFree(&grad)); 2477 } 2478 2479 // Core code 2480 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2481 return CEED_ERROR_SUCCESS; 2482 } 2483 2484 /** 2485 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2486 2487 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2488 2489 @param[in] op_fine Fine grid operator 2490 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2491 @param[in] rstr_coarse Coarse grid restriction 2492 @param[in] basis_coarse Coarse grid active vector basis 2493 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2494 @param[out] op_coarse Coarse grid operator 2495 @param[out] op_prolong Coarse to fine operator, or NULL 2496 @param[out] op_restrict Fine to coarse operator, or NULL 2497 2498 @return An error code: 0 - success, otherwise - failure 2499 2500 @ref User 2501 **/ 2502 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2503 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2504 CeedOperator *op_restrict) { 2505 Ceed ceed; 2506 CeedInt Q_f, Q_c; 2507 CeedBasis basis_fine, basis_c_to_f = NULL; 2508 2509 CeedCall(CeedOperatorCheckReady(op_fine)); 2510 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2511 2512 // Check for compatible quadrature spaces 2513 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2514 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2515 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2516 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2517 2518 // Coarse to fine basis 2519 if (op_prolong || op_restrict) { 2520 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2521 CeedScalar *q_ref, *q_weight, *grad; 2522 CeedElemTopology topo; 2523 2524 // Check if interpolation matrix is provided 2525 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2526 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2527 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2528 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2529 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2530 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2531 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2532 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2533 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2534 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2535 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2536 CeedCall(CeedFree(&q_ref)); 2537 CeedCall(CeedFree(&q_weight)); 2538 CeedCall(CeedFree(&grad)); 2539 } 2540 2541 // Core code 2542 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2543 return CEED_ERROR_SUCCESS; 2544 } 2545 2546 /** 2547 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2548 2549 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2550 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2551 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2552 \hat S V\f$. 2553 The CeedOperator must be linear and non-composite. 2554 The associated CeedQFunction must therefore also be linear. 2555 2556 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2557 2558 @param[in] op CeedOperator to create element inverses 2559 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2560 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2561 2562 @return An error code: 0 - success, otherwise - failure 2563 2564 @ref User 2565 **/ 2566 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2567 Ceed ceed, ceed_parent; 2568 bool interp = false, grad = false, is_tensor_basis = true; 2569 CeedInt num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2570 CeedSize l_size = 1; 2571 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg; 2572 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2573 CeedVector q_data; 2574 CeedElemRestriction rstr = NULL, rstr_qd_i; 2575 CeedBasis basis = NULL, fdm_basis; 2576 CeedQFunctionContext ctx_fdm; 2577 CeedQFunctionField *qf_fields; 2578 CeedQFunction qf, qf_fdm; 2579 CeedOperatorField *op_fields; 2580 2581 CeedCall(CeedOperatorCheckReady(op)); 2582 2583 if (op->CreateFDMElementInverse) { 2584 // Backend version 2585 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2586 return CEED_ERROR_SUCCESS; 2587 } else { 2588 // Operator fallback 2589 CeedOperator op_fallback; 2590 2591 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2592 if (op_fallback) { 2593 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2594 return CEED_ERROR_SUCCESS; 2595 } 2596 } 2597 2598 // Default interface implementation 2599 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2600 CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent)); 2601 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2602 2603 // Determine active input basis 2604 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2605 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2606 for (CeedInt i = 0; i < num_input_fields; i++) { 2607 CeedVector vec; 2608 2609 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2610 if (vec == CEED_VECTOR_ACTIVE) { 2611 CeedEvalMode eval_mode; 2612 2613 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2614 interp = interp || eval_mode == CEED_EVAL_INTERP; 2615 grad = grad || eval_mode == CEED_EVAL_GRAD; 2616 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2617 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2618 } 2619 } 2620 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2621 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2622 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2623 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2624 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2625 CeedCall(CeedBasisGetDimension(basis, &dim)); 2626 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2627 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2628 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2629 2630 // Build and diagonalize 1D Mass and Laplacian 2631 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2632 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2633 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2634 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2635 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2636 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2637 CeedCall(CeedCalloc(P_1d, &lambda)); 2638 // -- Build matrices 2639 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2640 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2641 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2642 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2643 2644 // -- Diagonalize 2645 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2646 CeedCall(CeedFree(&mass)); 2647 CeedCall(CeedFree(&laplace)); 2648 for (CeedInt i = 0; i < P_1d; i++) { 2649 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2650 } 2651 CeedCall(CeedFree(&x)); 2652 2653 { 2654 CeedInt layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2655 CeedScalar max_norm = 0; 2656 const CeedScalar *assembled_array, *q_weight_array; 2657 CeedVector assembled = NULL, q_weight; 2658 CeedElemRestriction rstr_qf = NULL; 2659 2660 // Assemble QFunction 2661 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2662 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2663 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2664 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2665 2666 // Calculate element averages 2667 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2668 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2669 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2670 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2671 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2672 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2673 2674 for (CeedInt e = 0; e < num_elem; e++) { 2675 CeedInt count = 0; 2676 2677 for (CeedInt q = 0; q < num_qpts; q++) { 2678 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2679 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2680 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2681 count++; 2682 } 2683 } 2684 } 2685 if (count) { 2686 elem_avg[e] /= count; 2687 } else { 2688 elem_avg[e] = 1.0; 2689 } 2690 } 2691 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2692 CeedCall(CeedVectorDestroy(&assembled)); 2693 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2694 CeedCall(CeedVectorDestroy(&q_weight)); 2695 } 2696 2697 // Build FDM diagonal 2698 { 2699 CeedScalar *q_data_array, *fdm_diagonal; 2700 2701 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2702 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2703 for (CeedInt c = 0; c < num_comp; c++) { 2704 for (CeedInt n = 0; n < num_nodes; n++) { 2705 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2706 if (grad) { 2707 for (CeedInt d = 0; d < dim; d++) { 2708 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2709 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2710 } 2711 } 2712 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2713 } 2714 } 2715 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2716 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2717 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2718 for (CeedInt e = 0; e < num_elem; e++) { 2719 for (CeedInt c = 0; c < num_comp; c++) { 2720 for (CeedInt n = 0; n < num_nodes; n++) 2721 q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2722 } 2723 } 2724 CeedCall(CeedFree(&elem_avg)); 2725 CeedCall(CeedFree(&fdm_diagonal)); 2726 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2727 } 2728 2729 // Setup FDM operator 2730 // -- Basis 2731 { 2732 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2733 2734 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2735 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2736 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2737 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2738 CeedCall(CeedFree(&fdm_interp)); 2739 CeedCall(CeedFree(&grad_dummy)); 2740 CeedCall(CeedFree(&q_ref_dummy)); 2741 CeedCall(CeedFree(&q_weight_dummy)); 2742 CeedCall(CeedFree(&lambda)); 2743 } 2744 2745 // -- Restriction 2746 { 2747 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2748 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2749 } 2750 2751 // -- QFunction 2752 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2753 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2754 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2755 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2756 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2757 2758 // -- QFunction context 2759 { 2760 CeedInt *num_comp_data; 2761 2762 CeedCall(CeedCalloc(1, &num_comp_data)); 2763 num_comp_data[0] = num_comp; 2764 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2765 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2766 } 2767 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2768 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2769 2770 // -- Operator 2771 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2772 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2773 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data)); 2774 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2775 2776 // Cleanup 2777 CeedCall(CeedVectorDestroy(&q_data)); 2778 CeedCall(CeedBasisDestroy(&fdm_basis)); 2779 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2780 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2781 return CEED_ERROR_SUCCESS; 2782 } 2783 2784 /// @} 2785