1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 char *source_path_with_name = NULL; 39 40 // Check if NULL qf passed in 41 if (!qf) return CEED_ERROR_SUCCESS; 42 43 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 44 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 45 46 if (qf->source_path) { 47 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 48 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 49 memcpy(source_path_with_name, qf->source_path, path_len); 50 memcpy(&source_path_with_name[path_len], ":", 1); 51 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 52 } else { 53 CeedCall(CeedCalloc(1, &source_path_with_name)); 54 } 55 56 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 57 { 58 CeedQFunctionContext ctx; 59 60 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 61 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 62 } 63 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 64 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 65 } 66 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 67 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 68 } 69 CeedCall(CeedFree(&source_path_with_name)); 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 Ceed ceed_fallback; 84 bool is_composite; 85 CeedOperator op_fallback; 86 87 // Check not already created 88 if (op->op_fallback) return CEED_ERROR_SUCCESS; 89 90 // Fallback Ceed 91 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 92 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 93 94 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 95 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 96 97 // Clone Op 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 117 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 118 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 119 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 120 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 121 op->input_fields[i]->vec)); 122 } 123 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 124 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 125 op->output_fields[i]->vec)); 126 } 127 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 128 // Cleanup 129 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 130 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 131 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 132 } 133 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 134 CeedCall(CeedOperatorCheckReady(op_fallback)); 135 // Note: No ref-counting here so we don't get caught in a reference loop. 136 // The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback. 137 op->op_fallback = op_fallback; 138 op_fallback->op_fallback_parent = op; 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Select correct basis matrix pointer based on CeedEvalMode 144 145 @param[in] basis CeedBasis from which to get the basis matrix 146 @param[in] eval_mode Current basis evaluation mode 147 @param[in] identity Pointer to identity matrix 148 @param[out] basis_ptr Basis pointer to set 149 150 @ref Developer 151 **/ 152 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 153 switch (eval_mode) { 154 case CEED_EVAL_NONE: 155 *basis_ptr = identity; 156 break; 157 case CEED_EVAL_INTERP: 158 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 159 break; 160 case CEED_EVAL_GRAD: 161 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 162 break; 163 case CEED_EVAL_DIV: 164 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 165 break; 166 case CEED_EVAL_CURL: 167 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 168 break; 169 case CEED_EVAL_WEIGHT: 170 break; // Caught by QF Assembly 171 } 172 assert(*basis_ptr != NULL); 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 181 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 182 @param[out] assembled CeedVector to store assembled diagonal 183 184 @return An error code: 0 - success, otherwise - failure 185 186 @ref Developer 187 **/ 188 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) { 189 Ceed ceed; 190 bool is_composite; 191 192 CeedCall(CeedOperatorGetCeed(op, &ceed)); 193 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 194 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 195 196 // Assemble QFunction 197 CeedInt layout_qf[3]; 198 const CeedScalar *assembled_qf_array; 199 CeedVector assembled_qf = NULL; 200 CeedElemRestriction assembled_elem_rstr = NULL; 201 202 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 203 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); 204 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 205 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 206 207 // Get assembly data 208 const CeedEvalMode **eval_modes_in, **eval_modes_out; 209 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 210 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 211 CeedBasis *active_bases_in, *active_bases_out; 212 CeedElemRestriction *active_elem_rstrs_in, *active_elem_rstrs_out; 213 CeedOperatorAssemblyData data; 214 215 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 216 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 217 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, 218 &num_output_components)); 219 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL)); 220 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out)); 221 222 // Loop over all active bases (find matching input/output pairs) 223 for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) { 224 CeedInt b_in, b_out, num_elem, num_nodes, num_qpts, num_comp; 225 bool has_eval_none = false; 226 CeedScalar *elem_diag_array, *identity = NULL; 227 CeedVector elem_diag; 228 CeedElemRestriction diag_elem_rstr; 229 230 if (num_active_bases_in <= num_active_bases_out) { 231 b_in = b; 232 for (b_out = 0; b_out < num_active_bases_out; b_out++) { 233 if (active_bases_in[b_in] == active_bases_out[b_out]) { 234 break; 235 } 236 } 237 if (b_out == num_active_bases_out) { 238 continue; 239 } // No matching output basis found 240 } else { 241 b_out = b; 242 for (b_in = 0; b_in < num_active_bases_in; b_in++) { 243 if (active_bases_in[b_in] == active_bases_out[b_out]) { 244 break; 245 } 246 } 247 if (b_in == num_active_bases_in) { 248 continue; 249 } // No matching output basis found 250 } 251 CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], ceed, CEED_ERROR_UNSUPPORTED, 252 "Cannot assemble operator diagonal with different input and output active element restrictions"); 253 254 // Assemble point block diagonal restriction, if needed 255 if (is_point_block) { 256 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr)); 257 } else { 258 CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr)); 259 } 260 261 // Create diagonal vector 262 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 263 264 // Assemble element operator diagonals 265 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 266 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 267 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 268 CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes)); 269 CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp)); 270 if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes; 271 else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts)); 272 273 // Construct identity matrix for basis if required 274 for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) { 275 has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE); 276 } 277 for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) { 278 has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE); 279 } 280 if (has_eval_none) { 281 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 282 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 283 } 284 285 // Compute the diagonal of B^T D B 286 // Each element 287 for (CeedSize e = 0; e < num_elem; e++) { 288 // Each basis eval mode pair 289 CeedInt d_out = 0, q_comp_out; 290 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 291 292 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) { 293 CeedInt d_in = 0, q_comp_in; 294 const CeedScalar *B_t = NULL; 295 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 296 297 CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t)); 298 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out)); 299 if (q_comp_out > 1) { 300 if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0; 301 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 302 } 303 eval_mode_out_prev = eval_modes_out[b_out][e_out]; 304 305 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) { 306 const CeedScalar *B = NULL; 307 308 CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B)); 309 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in)); 310 if (q_comp_in > 1) { 311 if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0; 312 else B = &B[(++d_in) * num_qpts * num_nodes]; 313 } 314 eval_mode_in_prev = eval_modes_in[b_in][e_in]; 315 316 // Each component 317 for (CeedInt c_out = 0; c_out < num_comp; c_out++) { 318 // Each qpt/node pair 319 for (CeedInt q = 0; q < num_qpts; q++) { 320 if (is_point_block) { 321 // Point Block Diagonal 322 for (CeedInt c_in = 0; c_in < num_comp; c_in++) { 323 const CeedSize c_offset = 324 (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out; 325 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 326 327 for (CeedInt n = 0; n < num_nodes; n++) { 328 elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] += 329 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 330 } 331 } 332 } else { 333 // Diagonal Only 334 const CeedInt c_offset = 335 (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out; 336 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 337 338 for (CeedInt n = 0; n < num_nodes; n++) { 339 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 340 } 341 } 342 } 343 } 344 } 345 } 346 } 347 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 348 349 // Assemble local operator diagonal 350 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 351 352 // Cleanup 353 CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 354 CeedCall(CeedVectorDestroy(&elem_diag)); 355 CeedCall(CeedFree(&identity)); 356 } 357 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 358 CeedCall(CeedVectorDestroy(&assembled_qf)); 359 return CEED_ERROR_SUCCESS; 360 } 361 362 /** 363 @brief Core logic for assembling composite operator diagonal 364 365 @param[in] op CeedOperator to assemble point block diagonal 366 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 367 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 368 @param[out] assembled CeedVector to store assembled diagonal 369 370 @return An error code: 0 - success, otherwise - failure 371 372 @ref Developer 373 **/ 374 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block, 375 CeedVector assembled) { 376 CeedInt num_sub; 377 CeedOperator *suboperators; 378 379 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 380 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 381 for (CeedInt i = 0; i < num_sub; i++) { 382 if (is_point_block) { 383 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 384 } else { 385 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 386 } 387 } 388 return CEED_ERROR_SUCCESS; 389 } 390 391 /** 392 @brief Build nonzero pattern for non-composite operator 393 394 Users should generally use CeedOperatorLinearAssembleSymbolic() 395 396 @param[in] op CeedOperator to assemble nonzero pattern 397 @param[in] offset Offset for number of entries 398 @param[out] rows Row number for each entry 399 @param[out] cols Column number for each entry 400 401 @return An error code: 0 - success, otherwise - failure 402 403 @ref Developer 404 **/ 405 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 406 Ceed ceed; 407 bool is_composite; 408 CeedSize num_nodes_in, num_nodes_out, count = 0; 409 CeedInt num_elem_in, elem_size_in, num_comp_in, layout_er_in[3]; 410 CeedInt num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries; 411 CeedScalar *array; 412 const CeedScalar *elem_dof_a_in, *elem_dof_a_out; 413 CeedVector index_vec_in, index_vec_out, elem_dof_in, elem_dof_out; 414 CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out; 415 416 CeedCall(CeedOperatorGetCeed(op, &ceed)); 417 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 418 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 419 420 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out)); 421 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 422 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 423 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 424 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 425 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in)); 426 427 // Determine elem_dof relation for input 428 CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in)); 429 CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array)); 430 for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i; 431 CeedCall(CeedVectorRestoreArray(index_vec_in, &array)); 432 CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in)); 433 CeedCall(CeedVectorSetValue(elem_dof_in, 0.0)); 434 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in)); 435 CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE)); 436 CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in)); 437 CeedCall(CeedVectorDestroy(&index_vec_in)); 438 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in)); 439 440 if (elem_rstr_in != elem_rstr_out) { 441 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 442 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 443 "Active input and output operator restrictions must have the same number of elements"); 444 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 445 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 446 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out)); 447 448 // Determine elem_dof relation for output 449 CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out)); 450 CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array)); 451 for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i; 452 CeedCall(CeedVectorRestoreArray(index_vec_out, &array)); 453 CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out)); 454 CeedCall(CeedVectorSetValue(elem_dof_out, 0.0)); 455 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out)); 456 CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE)); 457 CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out)); 458 CeedCall(CeedVectorDestroy(&index_vec_out)); 459 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out)); 460 } else { 461 num_elem_out = num_elem_in; 462 elem_size_out = elem_size_in; 463 num_comp_out = num_comp_in; 464 layout_er_out[0] = layout_er_in[0]; 465 layout_er_out[1] = layout_er_in[1]; 466 layout_er_out[2] = layout_er_in[2]; 467 elem_dof_a_out = elem_dof_a_in; 468 } 469 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 470 471 // Determine i, j locations for element matrices 472 for (CeedInt e = 0; e < num_elem_in; e++) { 473 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 474 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 475 for (CeedInt i = 0; i < elem_size_out; i++) { 476 for (CeedInt j = 0; j < elem_size_in; j++) { 477 const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2]; 478 const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2]; 479 const CeedInt row = elem_dof_a_out[elem_dof_index_row]; 480 const CeedInt col = elem_dof_a_in[elem_dof_index_col]; 481 482 rows[offset + count] = row; 483 cols[offset + count] = col; 484 count++; 485 } 486 } 487 } 488 } 489 } 490 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 491 CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in)); 492 CeedCall(CeedVectorDestroy(&elem_dof_in)); 493 if (elem_rstr_in != elem_rstr_out) { 494 CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out)); 495 CeedCall(CeedVectorDestroy(&elem_dof_out)); 496 } 497 return CEED_ERROR_SUCCESS; 498 } 499 500 /** 501 @brief Assemble nonzero entries for non-composite operator 502 503 Users should generally use CeedOperatorLinearAssemble() 504 505 @param[in] op CeedOperator to assemble 506 @param[in] offset Offset for number of entries 507 @param[out] values Values to assemble into matrix 508 509 @return An error code: 0 - success, otherwise - failure 510 511 @ref Developer 512 **/ 513 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 514 Ceed ceed; 515 bool is_composite; 516 517 CeedCall(CeedOperatorGetCeed(op, &ceed)); 518 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 519 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 520 521 // Early exit for empty operator 522 { 523 CeedInt num_elem = 0; 524 525 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 526 if (num_elem == 0) return CEED_ERROR_SUCCESS; 527 } 528 529 if (op->LinearAssembleSingle) { 530 // Backend version 531 CeedCall(op->LinearAssembleSingle(op, offset, values)); 532 return CEED_ERROR_SUCCESS; 533 } else { 534 // Operator fallback 535 CeedOperator op_fallback; 536 537 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 538 if (op_fallback) { 539 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 540 return CEED_ERROR_SUCCESS; 541 } 542 } 543 544 // Assemble QFunction 545 CeedInt layout_qf[3]; 546 const CeedScalar *assembled_qf_array; 547 CeedVector assembled_qf = NULL; 548 CeedElemRestriction assembled_elem_rstr = NULL; 549 550 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE)); 551 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); 552 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 553 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 554 555 // Get assembly data 556 CeedInt num_elem_in, elem_size_in, num_comp_in, num_qpts_in; 557 CeedInt num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries; 558 const CeedEvalMode **eval_modes_in, **eval_modes_out; 559 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 560 CeedBasis *active_bases_in, *active_bases_out, basis_in, basis_out; 561 const CeedScalar **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out; 562 CeedElemRestriction elem_rstr_in, elem_rstr_out; 563 CeedRestrictionType elem_rstr_type_in, elem_rstr_type_out; 564 const bool *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL; 565 const CeedInt8 *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL; 566 CeedOperatorAssemblyData data; 567 568 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 569 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out, 570 &num_eval_modes_out, &eval_modes_out, NULL, NULL)); 571 572 CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED, 573 "Cannot assemble operator with multiple active bases"); 574 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); 575 576 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out)); 577 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 578 basis_in = active_bases_in[0]; 579 basis_out = active_bases_out[0]; 580 B_mat_in = B_mats_in[0]; 581 B_mat_out = B_mats_out[0]; 582 583 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 584 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 585 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 586 if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in; 587 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in)); 588 589 CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in)); 590 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 591 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in)); 592 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 593 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in)); 594 } 595 596 if (elem_rstr_in != elem_rstr_out) { 597 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 598 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 599 "Active input and output operator restrictions must have the same number of elements"); 600 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 601 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 602 if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out; 603 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out)); 604 CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED, 605 "Active input and output bases must have the same number of quadrature points"); 606 607 CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out)); 608 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 609 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out)); 610 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 611 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out)); 612 } 613 } else { 614 num_elem_out = num_elem_in; 615 elem_size_out = elem_size_in; 616 num_comp_out = num_comp_in; 617 num_qpts_out = num_qpts_in; 618 619 elem_rstr_orients_out = elem_rstr_orients_in; 620 elem_rstr_curl_orients_out = elem_rstr_curl_orients_in; 621 } 622 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 623 624 // Loop over elements and put in data structure 625 // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 626 CeedSize count = 0; 627 CeedTensorContract contract; 628 CeedScalar *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL; 629 630 CeedCall(CeedTensorContractCreate(ceed, &contract)); 631 CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat)); 632 CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat)); 633 if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b)); 634 635 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 636 for (CeedSize e = 0; e < num_elem_in; e++) { 637 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 638 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 639 // Compute B^T*D 640 for (CeedSize n = 0; n < elem_size_out; n++) { 641 for (CeedSize q = 0; q < num_qpts_in; q++) { 642 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 643 const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in; 644 CeedScalar sum = 0.0; 645 646 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 647 const CeedSize b_out_index = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n; 648 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out; 649 const CeedSize qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 650 651 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 652 } 653 BTD_mat[btd_index] = sum; 654 } 655 } 656 } 657 658 // Form element matrix itself (for each block component) 659 CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE, 660 false, B_mat_in, elem_mat)); 661 662 // Transform the element matrix if required 663 if (elem_rstr_orients_out) { 664 const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out]; 665 666 for (CeedInt i = 0; i < elem_size_out; i++) { 667 const double orient = elem_orients[i] ? -1.0 : 1.0; 668 669 for (CeedInt j = 0; j < elem_size_in; j++) { 670 elem_mat[i * elem_size_in + j] *= orient; 671 } 672 } 673 } else if (elem_rstr_curl_orients_out) { 674 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out]; 675 676 // T^T*(B^T*D*B) 677 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 678 for (CeedInt i = 0; i < elem_size_out; i++) { 679 for (CeedInt j = 0; j < elem_size_in; j++) { 680 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] + 681 (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) + 682 (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0); 683 } 684 } 685 } 686 if (elem_rstr_orients_in) { 687 const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in]; 688 689 for (CeedInt i = 0; i < elem_size_out; i++) { 690 for (CeedInt j = 0; j < elem_size_in; j++) { 691 elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0; 692 } 693 } 694 } else if (elem_rstr_curl_orients_in) { 695 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in]; 696 697 // (B^T*D*B)*T 698 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 699 for (CeedInt i = 0; i < elem_size_out; i++) { 700 for (CeedInt j = 0; j < elem_size_in; j++) { 701 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] + 702 (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) + 703 (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0); 704 } 705 } 706 } 707 708 // Put element matrix in coordinate data structure 709 for (CeedInt i = 0; i < elem_size_out; i++) { 710 for (CeedInt j = 0; j < elem_size_in; j++) { 711 vals[offset + count] = elem_mat[i * elem_size_in + j]; 712 count++; 713 } 714 } 715 } 716 } 717 } 718 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 719 CeedCall(CeedVectorRestoreArray(values, &vals)); 720 721 // Cleanup 722 CeedCall(CeedTensorContractDestroy(&contract)); 723 CeedCall(CeedFree(&BTD_mat)); 724 CeedCall(CeedFree(&elem_mat)); 725 CeedCall(CeedFree(&elem_mat_b)); 726 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 727 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in)); 728 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 729 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in)); 730 } 731 if (elem_rstr_in != elem_rstr_out) { 732 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 733 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out)); 734 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 735 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out)); 736 } 737 } 738 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 739 CeedCall(CeedVectorDestroy(&assembled_qf)); 740 return CEED_ERROR_SUCCESS; 741 } 742 743 /** 744 @brief Count number of entries for assembled CeedOperator 745 746 @param[in] op CeedOperator to assemble 747 @param[out] num_entries Number of entries in assembled representation 748 749 @return An error code: 0 - success, otherwise - failure 750 751 @ref Utility 752 **/ 753 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) { 754 bool is_composite; 755 CeedInt num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out; 756 CeedElemRestriction rstr_in, rstr_out; 757 758 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 759 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 760 761 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out)); 762 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in)); 763 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in)); 764 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in)); 765 if (rstr_in != rstr_out) { 766 CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out)); 767 CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED, 768 "Active input and output operator restrictions must have the same number of elements"); 769 CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out)); 770 CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out)); 771 } else { 772 num_elem_out = num_elem_in; 773 elem_size_out = elem_size_in; 774 num_comp_out = num_comp_in; 775 } 776 *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in; 777 return CEED_ERROR_SUCCESS; 778 } 779 780 /** 781 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 782 783 @param[in] op_fine Fine grid operator 784 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 785 @param[in] rstr_coarse Coarse grid restriction 786 @param[in] basis_coarse Coarse grid active vector basis 787 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 788 @param[out] op_coarse Coarse grid operator 789 @param[out] op_prolong Coarse to fine operator, or NULL 790 @param[out] op_restrict Fine to coarse operator, or NULL 791 792 @return An error code: 0 - success, otherwise - failure 793 794 @ref Developer 795 **/ 796 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 797 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 798 bool is_composite; 799 Ceed ceed; 800 CeedInt num_comp; 801 CeedVector mult_vec = NULL; 802 CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL; 803 804 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 805 806 // Check for composite operator 807 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 808 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 809 810 // Coarse Grid 811 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 812 // -- Clone input fields 813 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 814 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 815 rstr_fine = op_fine->input_fields[i]->elem_rstr; 816 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 817 } else { 818 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 819 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 820 } 821 } 822 // -- Clone output fields 823 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 824 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 825 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 826 } else { 827 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 828 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 829 } 830 } 831 // -- Clone QFunctionAssemblyData 832 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 833 834 // Multiplicity vector 835 if (op_restrict || op_prolong) { 836 CeedVector mult_e_vec; 837 CeedRestrictionType rstr_type; 838 839 CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type)); 840 CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED, 841 "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported"); 842 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 843 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 844 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 845 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 846 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 847 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 848 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 849 CeedCall(CeedVectorDestroy(&mult_e_vec)); 850 CeedCall(CeedVectorReciprocal(mult_vec)); 851 } 852 853 // Clone name 854 bool has_name = op_fine->name; 855 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 856 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 857 858 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 859 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 860 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 861 862 // Restriction/Prolongation Operators 863 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 864 865 // Restriction 866 if (op_restrict) { 867 CeedInt *num_comp_r_data; 868 CeedQFunctionContext ctx_r; 869 CeedQFunction qf_restrict; 870 871 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 872 CeedCall(CeedCalloc(1, &num_comp_r_data)); 873 num_comp_r_data[0] = num_comp; 874 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 875 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 876 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 877 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 878 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 879 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 880 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 881 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 882 883 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 884 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 885 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 886 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 887 888 // Set name 889 char *restriction_name; 890 891 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 892 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 893 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 894 CeedCall(CeedFree(&restriction_name)); 895 896 // Check 897 CeedCall(CeedOperatorCheckReady(*op_restrict)); 898 899 // Cleanup 900 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 901 } 902 903 // Prolongation 904 if (op_prolong) { 905 CeedInt *num_comp_p_data; 906 CeedQFunctionContext ctx_p; 907 CeedQFunction qf_prolong; 908 909 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 910 CeedCall(CeedCalloc(1, &num_comp_p_data)); 911 num_comp_p_data[0] = num_comp; 912 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 913 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 914 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 915 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 916 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 917 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 918 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 919 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 920 921 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 922 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 923 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 924 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 925 926 // Set name 927 char *prolongation_name; 928 929 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 930 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 931 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 932 CeedCall(CeedFree(&prolongation_name)); 933 934 // Check 935 CeedCall(CeedOperatorCheckReady(*op_prolong)); 936 937 // Cleanup 938 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 939 } 940 941 // Check 942 CeedCall(CeedOperatorCheckReady(*op_coarse)); 943 944 // Cleanup 945 CeedCall(CeedVectorDestroy(&mult_vec)); 946 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 947 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 948 return CEED_ERROR_SUCCESS; 949 } 950 951 /** 952 @brief Build 1D mass matrix and Laplacian with perturbation 953 954 @param[in] interp_1d Interpolation matrix in one dimension 955 @param[in] grad_1d Gradient matrix in one dimension 956 @param[in] q_weight_1d Quadrature weights in one dimension 957 @param[in] P_1d Number of basis nodes in one dimension 958 @param[in] Q_1d Number of quadrature points in one dimension 959 @param[in] dim Dimension of basis 960 @param[out] mass Assembled mass matrix in one dimension 961 @param[out] laplace Assembled perturbed Laplacian in one dimension 962 963 @return An error code: 0 - success, otherwise - failure 964 965 @ref Developer 966 **/ 967 CeedPragmaOptimizeOff 968 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 969 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 970 for (CeedInt i = 0; i < P_1d; i++) { 971 for (CeedInt j = 0; j < P_1d; j++) { 972 CeedScalar sum = 0.0; 973 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 974 mass[i + j * P_1d] = sum; 975 } 976 } 977 // -- Laplacian 978 for (CeedInt i = 0; i < P_1d; i++) { 979 for (CeedInt j = 0; j < P_1d; j++) { 980 CeedScalar sum = 0.0; 981 982 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 983 laplace[i + j * P_1d] = sum; 984 } 985 } 986 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 987 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 988 return CEED_ERROR_SUCCESS; 989 } 990 CeedPragmaOptimizeOn 991 992 /// @} 993 994 /// ---------------------------------------------------------------------------- 995 /// CeedOperator Backend API 996 /// ---------------------------------------------------------------------------- 997 /// @addtogroup CeedOperatorBackend 998 /// @{ 999 1000 /** 1001 @brief Create point block restriction for active operator field 1002 1003 @param[in] rstr Original CeedElemRestriction for active field 1004 @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored 1005 1006 @return An error code: 0 - success, otherwise - failure 1007 1008 @ref Backend 1009 **/ 1010 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) { 1011 Ceed ceed; 1012 CeedInt num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets; 1013 CeedSize l_size; 1014 const CeedInt *offsets; 1015 1016 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 1017 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 1018 1019 // Expand offsets 1020 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 1021 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 1022 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 1023 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 1024 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 1025 shift = num_comp; 1026 if (comp_stride != 1) shift *= num_comp; 1027 CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets)); 1028 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 1029 point_block_offsets[i] = offsets[i] * shift; 1030 } 1031 1032 // Create new restriction 1033 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 1034 point_block_offsets, point_block_rstr)); 1035 1036 // Cleanup 1037 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 1038 return CEED_ERROR_SUCCESS; 1039 } 1040 1041 /** 1042 @brief Create object holding CeedQFunction assembly data for CeedOperator 1043 1044 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1045 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 1046 1047 @return An error code: 0 - success, otherwise - failure 1048 1049 @ref Backend 1050 **/ 1051 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 1052 CeedCall(CeedCalloc(1, data)); 1053 (*data)->ref_count = 1; 1054 (*data)->ceed = ceed; 1055 CeedCall(CeedReference(ceed)); 1056 return CEED_ERROR_SUCCESS; 1057 } 1058 1059 /** 1060 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1061 1062 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 1063 1064 @return An error code: 0 - success, otherwise - failure 1065 1066 @ref Backend 1067 **/ 1068 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1069 data->ref_count++; 1070 return CEED_ERROR_SUCCESS; 1071 } 1072 1073 /** 1074 @brief Set re-use of CeedQFunctionAssemblyData 1075 1076 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 1077 @param[in] reuse_data Boolean flag indicating data re-use 1078 1079 @return An error code: 0 - success, otherwise - failure 1080 1081 @ref Backend 1082 **/ 1083 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 1084 data->reuse_data = reuse_data; 1085 data->needs_data_update = true; 1086 return CEED_ERROR_SUCCESS; 1087 } 1088 1089 /** 1090 @brief Mark QFunctionAssemblyData as stale 1091 1092 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 1093 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 1094 1095 @return An error code: 0 - success, otherwise - failure 1096 1097 @ref Backend 1098 **/ 1099 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 1100 data->needs_data_update = needs_data_update; 1101 return CEED_ERROR_SUCCESS; 1102 } 1103 1104 /** 1105 @brief Determine if QFunctionAssemblyData needs update 1106 1107 @param[in] data CeedQFunctionAssemblyData to mark as stale 1108 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1109 1110 @return An error code: 0 - success, otherwise - failure 1111 1112 @ref Backend 1113 **/ 1114 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1115 *is_update_needed = !data->reuse_data || data->needs_data_update; 1116 return CEED_ERROR_SUCCESS; 1117 } 1118 1119 /** 1120 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1121 1122 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1123 1124 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1125 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1126 CeedQFunctionAssemblyData. 1127 1128 @param[in] data CeedQFunctionAssemblyData to copy reference to 1129 @param[in,out] data_copy Variable to store copied reference 1130 1131 @return An error code: 0 - success, otherwise - failure 1132 1133 @ref Backend 1134 **/ 1135 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1136 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1137 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1138 *data_copy = data; 1139 return CEED_ERROR_SUCCESS; 1140 } 1141 1142 /** 1143 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1144 1145 @param[in] data CeedQFunctionAssemblyData to retrieve status 1146 @param[out] is_setup Boolean flag for setup status 1147 1148 @return An error code: 0 - success, otherwise - failure 1149 1150 @ref Backend 1151 **/ 1152 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1153 *is_setup = data->is_setup; 1154 return CEED_ERROR_SUCCESS; 1155 } 1156 1157 /** 1158 @brief Set internal objects for CeedQFunctionAssemblyData 1159 1160 @param[in,out] data CeedQFunctionAssemblyData to set objects 1161 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1162 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1163 1164 @return An error code: 0 - success, otherwise - failure 1165 1166 @ref Backend 1167 **/ 1168 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1169 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1170 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1171 1172 data->is_setup = true; 1173 return CEED_ERROR_SUCCESS; 1174 } 1175 1176 /** 1177 @brief Get internal objects for CeedQFunctionAssemblyData 1178 1179 @param[in,out] data CeedQFunctionAssemblyData to set objects 1180 @param[out] vec CeedVector to store assembled CeedQFunction at quadrature points 1181 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1182 1183 @return An error code: 0 - success, otherwise - failure 1184 1185 @ref Backend 1186 **/ 1187 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1188 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1189 1190 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1191 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1192 return CEED_ERROR_SUCCESS; 1193 } 1194 1195 /** 1196 @brief Destroy CeedQFunctionAssemblyData 1197 1198 @param[in,out] data CeedQFunctionAssemblyData to destroy 1199 1200 @return An error code: 0 - success, otherwise - failure 1201 1202 @ref Backend 1203 **/ 1204 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1205 if (!*data || --(*data)->ref_count > 0) { 1206 *data = NULL; 1207 return CEED_ERROR_SUCCESS; 1208 } 1209 CeedCall(CeedDestroy(&(*data)->ceed)); 1210 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1211 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1212 1213 CeedCall(CeedFree(data)); 1214 return CEED_ERROR_SUCCESS; 1215 } 1216 1217 /** 1218 @brief Get CeedOperatorAssemblyData 1219 1220 @param[in] op CeedOperator to assemble 1221 @param[out] data CeedQFunctionAssemblyData 1222 1223 @return An error code: 0 - success, otherwise - failure 1224 1225 @ref Backend 1226 **/ 1227 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1228 if (!op->op_assembled) { 1229 CeedOperatorAssemblyData data; 1230 1231 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1232 op->op_assembled = data; 1233 } 1234 *data = op->op_assembled; 1235 return CEED_ERROR_SUCCESS; 1236 } 1237 1238 /** 1239 @brief Create object holding CeedOperator assembly data. 1240 1241 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1242 An array with references to the corresponding active CeedElemRestrictions is also stored. 1243 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1244 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1245 CeedEvalMode. 1246 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1247 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1248 1249 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1250 @param[in] op CeedOperator to be assembled 1251 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1252 1253 @return An error code: 0 - success, otherwise - failure 1254 1255 @ref Backend 1256 **/ 1257 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1258 CeedInt num_active_bases_in = 0, num_active_bases_out = 0, offset = 0; 1259 CeedInt num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL; 1260 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1261 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1262 CeedQFunctionField *qf_fields; 1263 CeedQFunction qf; 1264 CeedOperatorField *op_fields; 1265 bool is_composite; 1266 1267 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1268 CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators."); 1269 1270 // Allocate 1271 CeedCall(CeedCalloc(1, data)); 1272 (*data)->ceed = ceed; 1273 CeedCall(CeedReference(ceed)); 1274 1275 // Build OperatorAssembly data 1276 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1277 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1278 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1279 1280 // Determine active input basis 1281 for (CeedInt i = 0; i < num_input_fields; i++) { 1282 CeedVector vec; 1283 1284 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1285 if (vec == CEED_VECTOR_ACTIVE) { 1286 CeedInt index = -1, num_comp, q_comp; 1287 CeedEvalMode eval_mode; 1288 CeedBasis basis_in = NULL; 1289 1290 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1291 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1292 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1293 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1294 for (CeedInt i = 0; i < num_active_bases_in; i++) { 1295 if ((*data)->active_bases_in[i] == basis_in) index = i; 1296 } 1297 if (index == -1) { 1298 CeedElemRestriction elem_rstr_in; 1299 1300 index = num_active_bases_in; 1301 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in)); 1302 (*data)->active_bases_in[num_active_bases_in] = NULL; 1303 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in])); 1304 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in)); 1305 (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL; 1306 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1307 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in])); 1308 CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in)); 1309 num_eval_modes_in[index] = 0; 1310 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in)); 1311 eval_modes_in[index] = NULL; 1312 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in)); 1313 eval_mode_offsets_in[index] = NULL; 1314 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in)); 1315 (*data)->assembled_bases_in[index] = NULL; 1316 num_active_bases_in++; 1317 } 1318 if (eval_mode != CEED_EVAL_WEIGHT) { 1319 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1320 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1321 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1322 for (CeedInt d = 0; d < q_comp; d++) { 1323 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1324 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1325 offset += num_comp; 1326 } 1327 num_eval_modes_in[index] += q_comp; 1328 } 1329 } 1330 } 1331 1332 // Determine active output basis 1333 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1334 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1335 offset = 0; 1336 for (CeedInt i = 0; i < num_output_fields; i++) { 1337 CeedVector vec; 1338 1339 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1340 if (vec == CEED_VECTOR_ACTIVE) { 1341 CeedInt index = -1, num_comp, q_comp; 1342 CeedEvalMode eval_mode; 1343 CeedBasis basis_out = NULL; 1344 1345 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1346 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1347 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1348 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1349 for (CeedInt i = 0; i < num_active_bases_out; i++) { 1350 if ((*data)->active_bases_out[i] == basis_out) index = i; 1351 } 1352 if (index == -1) { 1353 CeedElemRestriction elem_rstr_out; 1354 1355 index = num_active_bases_out; 1356 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out)); 1357 (*data)->active_bases_out[num_active_bases_out] = NULL; 1358 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out])); 1359 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out)); 1360 (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL; 1361 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1362 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out])); 1363 CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out)); 1364 num_eval_modes_out[index] = 0; 1365 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out)); 1366 eval_modes_out[index] = NULL; 1367 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out)); 1368 eval_mode_offsets_out[index] = NULL; 1369 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out)); 1370 (*data)->assembled_bases_out[index] = NULL; 1371 num_active_bases_out++; 1372 } 1373 if (eval_mode != CEED_EVAL_WEIGHT) { 1374 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1375 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1376 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1377 for (CeedInt d = 0; d < q_comp; d++) { 1378 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1379 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1380 offset += num_comp; 1381 } 1382 num_eval_modes_out[index] += q_comp; 1383 } 1384 } 1385 } 1386 (*data)->num_active_bases_in = num_active_bases_in; 1387 (*data)->num_eval_modes_in = num_eval_modes_in; 1388 (*data)->eval_modes_in = eval_modes_in; 1389 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1390 (*data)->num_active_bases_out = num_active_bases_out; 1391 (*data)->num_eval_modes_out = num_eval_modes_out; 1392 (*data)->eval_modes_out = eval_modes_out; 1393 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1394 (*data)->num_output_components = offset; 1395 return CEED_ERROR_SUCCESS; 1396 } 1397 1398 /** 1399 @brief Get CeedOperator CeedEvalModes for assembly. 1400 1401 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1402 1403 @param[in] data CeedOperatorAssemblyData 1404 @param[out] num_active_bases_in Total number of active bases for input 1405 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1406 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1407 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1408 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1409 @param[out] num_active_bases_out Total number of active bases for output 1410 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1411 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1412 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1413 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1414 including contributions of all active bases 1415 1416 @return An error code: 0 - success, otherwise - failure 1417 1418 @ref Backend 1419 **/ 1420 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in, 1421 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out, 1422 CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, 1423 CeedSize *num_output_components) { 1424 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1425 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1426 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1427 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1428 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1429 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1430 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1431 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1432 if (num_output_components) *num_output_components = data->num_output_components; 1433 return CEED_ERROR_SUCCESS; 1434 } 1435 1436 /** 1437 @brief Get CeedOperator CeedBasis data for assembly. 1438 1439 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1440 1441 @param[in] data CeedOperatorAssemblyData 1442 @param[out] num_active_bases_in Number of active input bases, or NULL 1443 @param[out] active_bases_in Pointer to hold active input CeedBasis, or NULL 1444 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1445 @param[out] num_active_bases_out Number of active output bases, or NULL 1446 @param[out] active_bases_out Pointer to hold active output CeedBasis, or NULL 1447 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1448 1449 @return An error code: 0 - success, otherwise - failure 1450 1451 @ref Backend 1452 **/ 1453 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in, 1454 const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out, 1455 const CeedScalar ***assembled_bases_out) { 1456 // Assemble B_in, B_out if needed 1457 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1458 CeedInt num_qpts; 1459 1460 if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts)); 1461 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts)); 1462 for (CeedInt b = 0; b < data->num_active_bases_in; b++) { 1463 bool has_eval_none = false; 1464 CeedInt num_nodes; 1465 CeedScalar *B_in = NULL, *identity = NULL; 1466 1467 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes)); 1468 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1469 1470 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1471 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1472 } 1473 if (has_eval_none) { 1474 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1475 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1476 identity[i * num_nodes + i] = 1.0; 1477 } 1478 } 1479 1480 for (CeedInt q = 0; q < num_qpts; q++) { 1481 for (CeedInt n = 0; n < num_nodes; n++) { 1482 CeedInt d_in = 0, q_comp_in; 1483 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1484 1485 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1486 const CeedInt qq = data->num_eval_modes_in[b] * q; 1487 const CeedScalar *B = NULL; 1488 1489 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B)); 1490 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1491 if (q_comp_in > 1) { 1492 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1493 else B = &B[(++d_in) * num_qpts * num_nodes]; 1494 } 1495 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1496 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1497 } 1498 } 1499 } 1500 if (identity) CeedCall(CeedFree(&identity)); 1501 data->assembled_bases_in[b] = B_in; 1502 } 1503 } 1504 1505 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1506 CeedInt num_qpts; 1507 1508 if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts)); 1509 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts)); 1510 for (CeedInt b = 0; b < data->num_active_bases_out; b++) { 1511 bool has_eval_none = false; 1512 CeedInt num_nodes; 1513 CeedScalar *B_out = NULL, *identity = NULL; 1514 1515 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes)); 1516 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1517 1518 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1519 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1520 } 1521 if (has_eval_none) { 1522 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1523 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1524 identity[i * num_nodes + i] = 1.0; 1525 } 1526 } 1527 1528 for (CeedInt q = 0; q < num_qpts; q++) { 1529 for (CeedInt n = 0; n < num_nodes; n++) { 1530 CeedInt d_out = 0, q_comp_out; 1531 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1532 1533 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1534 const CeedInt qq = data->num_eval_modes_out[b] * q; 1535 const CeedScalar *B = NULL; 1536 1537 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B)); 1538 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1539 if (q_comp_out > 1) { 1540 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1541 else B = &B[(++d_out) * num_qpts * num_nodes]; 1542 } 1543 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1544 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1545 } 1546 } 1547 } 1548 if (identity) CeedCall(CeedFree(&identity)); 1549 data->assembled_bases_out[b] = B_out; 1550 } 1551 } 1552 1553 // Pass out assembled data 1554 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1555 if (active_bases_in) *active_bases_in = data->active_bases_in; 1556 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1557 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1558 if (active_bases_out) *active_bases_out = data->active_bases_out; 1559 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1560 return CEED_ERROR_SUCCESS; 1561 } 1562 1563 /** 1564 @brief Get CeedOperator CeedBasis data for assembly. 1565 1566 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1567 1568 @param[in] data CeedOperatorAssemblyData 1569 @param[out] num_active_elem_rstrs_in Number of active input element restrictions, or NULL 1570 @param[out] active_elem_rstrs_in Pointer to hold active input CeedElemRestrictions, or NULL 1571 @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL 1572 @param[out] active_elem_rstrs_out Pointer to hold active output CeedElemRestrictions, or NULL 1573 1574 @return An error code: 0 - success, otherwise - failure 1575 1576 @ref Backend 1577 **/ 1578 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in, 1579 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out, 1580 CeedElemRestriction **active_elem_rstrs_out) { 1581 if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in; 1582 if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in; 1583 if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out; 1584 if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out; 1585 return CEED_ERROR_SUCCESS; 1586 } 1587 1588 /** 1589 @brief Destroy CeedOperatorAssemblyData 1590 1591 @param[in,out] data CeedOperatorAssemblyData to destroy 1592 1593 @return An error code: 0 - success, otherwise - failure 1594 1595 @ref Backend 1596 **/ 1597 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1598 if (!*data) { 1599 *data = NULL; 1600 return CEED_ERROR_SUCCESS; 1601 } 1602 CeedCall(CeedDestroy(&(*data)->ceed)); 1603 for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) { 1604 CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b])); 1605 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b])); 1606 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1607 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1608 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1609 } 1610 for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) { 1611 CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b])); 1612 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b])); 1613 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1614 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1615 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1616 } 1617 CeedCall(CeedFree(&(*data)->active_bases_in)); 1618 CeedCall(CeedFree(&(*data)->active_bases_out)); 1619 CeedCall(CeedFree(&(*data)->active_elem_rstrs_in)); 1620 CeedCall(CeedFree(&(*data)->active_elem_rstrs_out)); 1621 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1622 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1623 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1624 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1625 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1626 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1627 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1628 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1629 1630 CeedCall(CeedFree(data)); 1631 return CEED_ERROR_SUCCESS; 1632 } 1633 1634 /** 1635 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 1636 1637 @param[in] op CeedOperator to retrieve fallback for 1638 @param[out] op_fallback Fallback CeedOperator 1639 1640 @return An error code: 0 - success, otherwise - failure 1641 1642 @ref Backend 1643 **/ 1644 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 1645 // Create if needed 1646 if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op)); 1647 if (op->op_fallback) { 1648 bool is_debug; 1649 1650 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 1651 if (is_debug) { 1652 Ceed ceed, ceed_fallback; 1653 const char *resource, *resource_fallback; 1654 1655 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1656 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 1657 CeedCall(CeedGetResource(ceed, &resource)); 1658 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 1659 1660 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n"); 1661 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 1662 op->op_fallback); 1663 } 1664 } 1665 *op_fallback = op->op_fallback; 1666 return CEED_ERROR_SUCCESS; 1667 } 1668 1669 /** 1670 @brief Get the parent CeedOperator for a fallback CeedOperator 1671 1672 @param[in] op CeedOperator context 1673 @param[out] parent Variable to store parent CeedOperator context 1674 1675 @return An error code: 0 - success, otherwise - failure 1676 1677 @ref Backend 1678 **/ 1679 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) { 1680 *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL; 1681 return CEED_ERROR_SUCCESS; 1682 } 1683 1684 /** 1685 @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator 1686 1687 @param[in] op CeedOperator context 1688 @param[out] parent Variable to store parent Ceed context 1689 1690 @return An error code: 0 - success, otherwise - failure 1691 1692 @ref Backend 1693 **/ 1694 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) { 1695 *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed; 1696 return CEED_ERROR_SUCCESS; 1697 } 1698 1699 /// @} 1700 1701 /// ---------------------------------------------------------------------------- 1702 /// CeedOperator Public API 1703 /// ---------------------------------------------------------------------------- 1704 /// @addtogroup CeedOperatorUser 1705 /// @{ 1706 1707 /** 1708 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1709 1710 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1711 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1712 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1713 1714 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1715 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1716 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1717 1718 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1719 1720 @param[in] op CeedOperator to assemble CeedQFunction 1721 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1722 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1723 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1724 1725 @return An error code: 0 - success, otherwise - failure 1726 1727 @ref User 1728 **/ 1729 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1730 CeedCall(CeedOperatorCheckReady(op)); 1731 1732 if (op->LinearAssembleQFunction) { 1733 // Backend version 1734 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1735 } else { 1736 // Operator fallback 1737 CeedOperator op_fallback; 1738 1739 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1740 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1741 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1742 } 1743 return CEED_ERROR_SUCCESS; 1744 } 1745 1746 /** 1747 @brief Assemble CeedQFunction and store result internally. 1748 1749 Return copied references of stored data to the caller. 1750 Caller is responsible for ownership and destruction of the copied references. 1751 See also @ref CeedOperatorLinearAssembleQFunction 1752 1753 Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers. 1754 These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object. 1755 1756 @param[in] op CeedOperator to assemble CeedQFunction 1757 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1758 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1759 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1760 1761 @return An error code: 0 - success, otherwise - failure 1762 1763 @ref User 1764 **/ 1765 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1766 int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL; 1767 CeedOperator op_assemble = NULL; 1768 CeedOperator op_fallback_parent = NULL; 1769 1770 CeedCall(CeedOperatorCheckReady(op)); 1771 1772 // Determine if fallback parent or operator has implementation 1773 CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent)); 1774 if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) { 1775 // -- Backend version for op fallback parent is faster, if it exists 1776 LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate; 1777 op_assemble = op_fallback_parent; 1778 } else if (op->LinearAssembleQFunctionUpdate) { 1779 // -- Backend version for op 1780 LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate; 1781 op_assemble = op; 1782 } 1783 1784 // Assemble QFunction 1785 if (LinearAssembleQFunctionUpdate) { 1786 // Backend or fallback parent version 1787 bool qf_assembled_is_setup; 1788 CeedVector assembled_vec = NULL; 1789 CeedElemRestriction assembled_rstr = NULL; 1790 1791 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1792 if (qf_assembled_is_setup) { 1793 bool update_needed; 1794 1795 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1796 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1797 if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request)); 1798 } else { 1799 CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request)); 1800 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1801 } 1802 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1803 1804 // Copy reference from internally held copy 1805 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1806 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1807 CeedCall(CeedVectorDestroy(&assembled_vec)); 1808 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1809 } else { 1810 // Operator fallback 1811 CeedOperator op_fallback; 1812 1813 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1814 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1815 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1816 } 1817 return CEED_ERROR_SUCCESS; 1818 } 1819 1820 /** 1821 @brief Assemble the diagonal of a square linear CeedOperator 1822 1823 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1824 1825 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1826 1827 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1828 1829 @param[in] op CeedOperator to assemble CeedQFunction 1830 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1831 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1832 1833 @return An error code: 0 - success, otherwise - failure 1834 1835 @ref User 1836 **/ 1837 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1838 bool is_composite; 1839 CeedSize input_size = 0, output_size = 0; 1840 1841 CeedCall(CeedOperatorCheckReady(op)); 1842 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1843 1844 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1845 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1846 1847 // Early exit for empty operator 1848 if (!is_composite) { 1849 CeedInt num_elem = 0; 1850 1851 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1852 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1853 } 1854 1855 if (op->LinearAssembleDiagonal) { 1856 // Backend version 1857 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1858 return CEED_ERROR_SUCCESS; 1859 } else if (op->LinearAssembleAddDiagonal) { 1860 // Backend version with zeroing first 1861 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1862 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1863 return CEED_ERROR_SUCCESS; 1864 } else { 1865 // Operator fallback 1866 CeedOperator op_fallback; 1867 1868 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1869 if (op_fallback) { 1870 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1871 return CEED_ERROR_SUCCESS; 1872 } 1873 } 1874 // Default interface implementation 1875 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1876 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1877 return CEED_ERROR_SUCCESS; 1878 } 1879 1880 /** 1881 @brief Assemble the diagonal of a square linear CeedOperator 1882 1883 This sums into a CeedVector the diagonal of a linear CeedOperator. 1884 1885 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1886 1887 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1888 1889 @param[in] op CeedOperator to assemble CeedQFunction 1890 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1891 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1892 1893 @return An error code: 0 - success, otherwise - failure 1894 1895 @ref User 1896 **/ 1897 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1898 bool is_composite; 1899 CeedSize input_size = 0, output_size = 0; 1900 1901 CeedCall(CeedOperatorCheckReady(op)); 1902 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1903 1904 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1905 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1906 1907 // Early exit for empty operator 1908 if (!is_composite) { 1909 CeedInt num_elem = 0; 1910 1911 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1912 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1913 } 1914 1915 if (op->LinearAssembleAddDiagonal) { 1916 // Backend version 1917 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1918 return CEED_ERROR_SUCCESS; 1919 } else { 1920 // Operator fallback 1921 CeedOperator op_fallback; 1922 1923 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1924 if (op_fallback) { 1925 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1926 return CEED_ERROR_SUCCESS; 1927 } 1928 } 1929 // Default interface implementation 1930 if (is_composite) { 1931 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1932 } else { 1933 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1934 } 1935 return CEED_ERROR_SUCCESS; 1936 } 1937 1938 /** 1939 @brief Fully assemble the point-block diagonal pattern of a linear operator. 1940 1941 Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal(). 1942 1943 The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the 1944 matrix in entry (i, j). 1945 Note that the (i, j) pairs are unique. 1946 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in 1947 the same ordering. 1948 1949 This will generally be slow unless your operator is low-order. 1950 1951 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1952 1953 @param[in] op CeedOperator to assemble 1954 @param[out] num_entries Number of entries in coordinate nonzero pattern 1955 @param[out] rows Row number for each entry 1956 @param[out] cols Column number for each entry 1957 1958 @ref User 1959 **/ 1960 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1961 Ceed ceed; 1962 bool is_composite; 1963 CeedInt num_active_components, num_sub_operators; 1964 CeedOperator *sub_operators; 1965 1966 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1967 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1968 1969 CeedSize input_size = 0, output_size = 0; 1970 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1971 CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1972 1973 if (is_composite) { 1974 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators)); 1975 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1976 } else { 1977 sub_operators = &op; 1978 num_sub_operators = 1; 1979 } 1980 1981 // Verify operator can be assembled correctly 1982 { 1983 CeedOperatorAssemblyData data; 1984 CeedInt num_active_elem_rstrs, comp_stride; 1985 CeedElemRestriction *active_elem_rstrs; 1986 1987 // Get initial values to check against 1988 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data)); 1989 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 1990 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride)); 1991 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components)); 1992 1993 // Verify that all active element restrictions have same component stride and number of components 1994 for (CeedInt k = 0; k < num_sub_operators; k++) { 1995 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data)); 1996 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 1997 for (CeedInt i = 0; i < num_active_elem_rstrs; i++) { 1998 CeedInt comp_stride_sub, num_active_components_sub; 1999 2000 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub)); 2001 CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION, 2002 "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub); 2003 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub)); 2004 CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE, 2005 "All suboperators must have the same number of output components"); 2006 } 2007 } 2008 } 2009 *num_entries = input_size * num_active_components; 2010 CeedCall(CeedCalloc(*num_entries, rows)); 2011 CeedCall(CeedCalloc(*num_entries, cols)); 2012 2013 for (CeedInt o = 0; o < num_sub_operators; o++) { 2014 CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr; 2015 CeedInt comp_stride, num_elem, elem_size; 2016 const CeedInt *offsets, *point_block_offsets; 2017 2018 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr)); 2019 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride)); 2020 CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem)); 2021 CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size)); 2022 CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets)); 2023 2024 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr)); 2025 CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets)); 2026 2027 for (CeedSize i = 0; i < num_elem * elem_size; i++) { 2028 for (CeedInt c_out = 0; c_out < num_active_components; c_out++) { 2029 for (CeedInt c_in = 0; c_in < num_active_components; c_in++) { 2030 (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride; 2031 (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride; 2032 } 2033 } 2034 } 2035 2036 CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets)); 2037 CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets)); 2038 CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr)); 2039 } 2040 return CEED_ERROR_SUCCESS; 2041 } 2042 2043 /** 2044 @brief Assemble the point block diagonal of a square linear CeedOperator 2045 2046 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 2047 2048 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 2049 2050 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2051 2052 @param[in] op CeedOperator to assemble CeedQFunction 2053 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 2054 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 2055 component in]. 2056 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2057 2058 @return An error code: 0 - success, otherwise - failure 2059 2060 @ref User 2061 **/ 2062 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2063 bool is_composite; 2064 CeedSize input_size = 0, output_size = 0; 2065 2066 CeedCall(CeedOperatorCheckReady(op)); 2067 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2068 2069 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2070 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2071 2072 // Early exit for empty operator 2073 if (!is_composite) { 2074 CeedInt num_elem = 0; 2075 2076 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2077 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2078 } 2079 2080 if (op->LinearAssemblePointBlockDiagonal) { 2081 // Backend version 2082 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 2083 return CEED_ERROR_SUCCESS; 2084 } else if (op->LinearAssembleAddPointBlockDiagonal) { 2085 // Backend version with zeroing first 2086 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2087 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2088 return CEED_ERROR_SUCCESS; 2089 } else { 2090 // Operator fallback 2091 CeedOperator op_fallback; 2092 2093 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2094 if (op_fallback) { 2095 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 2096 return CEED_ERROR_SUCCESS; 2097 } 2098 } 2099 // Default interface implementation 2100 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2101 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2102 return CEED_ERROR_SUCCESS; 2103 } 2104 2105 /** 2106 @brief Assemble the point block diagonal of a square linear CeedOperator 2107 2108 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 2109 2110 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 2111 2112 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2113 2114 @param[in] op CeedOperator to assemble CeedQFunction 2115 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 2116 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 2117 component in]. 2118 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2119 2120 @return An error code: 0 - success, otherwise - failure 2121 2122 @ref User 2123 **/ 2124 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2125 bool is_composite; 2126 CeedSize input_size = 0, output_size = 0; 2127 2128 CeedCall(CeedOperatorCheckReady(op)); 2129 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2130 2131 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2132 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2133 2134 // Early exit for empty operator 2135 if (!is_composite) { 2136 CeedInt num_elem = 0; 2137 2138 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2139 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2140 } 2141 2142 if (op->LinearAssembleAddPointBlockDiagonal) { 2143 // Backend version 2144 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2145 return CEED_ERROR_SUCCESS; 2146 } else { 2147 // Operator fallback 2148 CeedOperator op_fallback; 2149 2150 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2151 if (op_fallback) { 2152 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 2153 return CEED_ERROR_SUCCESS; 2154 } 2155 } 2156 // Default interface implementation 2157 if (is_composite) { 2158 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 2159 } else { 2160 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 2161 } 2162 return CEED_ERROR_SUCCESS; 2163 } 2164 2165 /** 2166 @brief Fully assemble the nonzero pattern of a linear operator. 2167 2168 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 2169 2170 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 2171 matrix in entry (i, j). 2172 Note that the (i, j) pairs are not unique and may repeat. 2173 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 2174 2175 This will generally be slow unless your operator is low-order. 2176 2177 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2178 2179 @param[in] op CeedOperator to assemble 2180 @param[out] num_entries Number of entries in coordinate nonzero pattern 2181 @param[out] rows Row number for each entry 2182 @param[out] cols Column number for each entry 2183 2184 @ref User 2185 **/ 2186 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 2187 bool is_composite; 2188 CeedInt num_suboperators, offset = 0; 2189 CeedSize single_entries; 2190 CeedOperator *sub_operators; 2191 2192 CeedCall(CeedOperatorCheckReady(op)); 2193 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2194 2195 if (op->LinearAssembleSymbolic) { 2196 // Backend version 2197 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 2198 return CEED_ERROR_SUCCESS; 2199 } else { 2200 // Operator fallback 2201 CeedOperator op_fallback; 2202 2203 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2204 if (op_fallback) { 2205 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 2206 return CEED_ERROR_SUCCESS; 2207 } 2208 } 2209 2210 // Default interface implementation 2211 2212 // Count entries and allocate rows, cols arrays 2213 *num_entries = 0; 2214 if (is_composite) { 2215 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2216 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2217 for (CeedInt k = 0; k < num_suboperators; ++k) { 2218 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2219 *num_entries += single_entries; 2220 } 2221 } else { 2222 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 2223 *num_entries += single_entries; 2224 } 2225 CeedCall(CeedCalloc(*num_entries, rows)); 2226 CeedCall(CeedCalloc(*num_entries, cols)); 2227 2228 // Assemble nonzero locations 2229 if (is_composite) { 2230 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2231 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2232 for (CeedInt k = 0; k < num_suboperators; ++k) { 2233 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 2234 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2235 offset += single_entries; 2236 } 2237 } else { 2238 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 2239 } 2240 return CEED_ERROR_SUCCESS; 2241 } 2242 2243 /** 2244 @brief Fully assemble the nonzero entries of a linear operator. 2245 2246 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 2247 2248 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 2249 matrix in entry (i, j). 2250 Note that the (i, j) pairs are not unique and may repeat. 2251 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 2252 2253 This will generally be slow unless your operator is low-order. 2254 2255 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2256 2257 @param[in] op CeedOperator to assemble 2258 @param[out] values Values to assemble into matrix 2259 2260 @ref User 2261 **/ 2262 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 2263 bool is_composite; 2264 CeedInt num_suboperators, offset = 0; 2265 CeedSize single_entries = 0; 2266 CeedOperator *sub_operators; 2267 2268 CeedCall(CeedOperatorCheckReady(op)); 2269 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2270 2271 // Early exit for empty operator 2272 if (!is_composite) { 2273 CeedInt num_elem = 0; 2274 2275 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2276 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2277 } 2278 2279 if (op->LinearAssemble) { 2280 // Backend version 2281 CeedCall(op->LinearAssemble(op, values)); 2282 return CEED_ERROR_SUCCESS; 2283 } else { 2284 // Operator fallback 2285 CeedOperator op_fallback; 2286 2287 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2288 if (op_fallback) { 2289 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2290 return CEED_ERROR_SUCCESS; 2291 } 2292 } 2293 2294 // Default interface implementation 2295 CeedCall(CeedVectorSetValue(values, 0.0)); 2296 if (is_composite) { 2297 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2298 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2299 for (CeedInt k = 0; k < num_suboperators; k++) { 2300 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2301 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2302 offset += single_entries; 2303 } 2304 } else { 2305 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2306 } 2307 return CEED_ERROR_SUCCESS; 2308 } 2309 2310 /** 2311 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2312 2313 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2314 2315 @param[in] op Composite CeedOperator 2316 @param[in] num_skip_indices Number of suboperators to skip 2317 @param[in] skip_indices Array of indices of suboperators to skip 2318 @param[out] mult Vector to store multiplicity (of size l_size) 2319 2320 @return An error code: 0 - success, otherwise - failure 2321 2322 @ref User 2323 **/ 2324 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2325 Ceed ceed; 2326 CeedInt num_suboperators; 2327 CeedSize l_vec_len; 2328 CeedScalar *mult_array; 2329 CeedVector ones_l_vec; 2330 CeedElemRestriction elem_rstr, mult_elem_rstr; 2331 CeedOperator *sub_operators; 2332 2333 CeedCall(CeedOperatorCheckReady(op)); 2334 2335 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2336 2337 // Zero mult vector 2338 CeedCall(CeedVectorSetValue(mult, 0.0)); 2339 2340 // Get suboperators 2341 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2342 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2343 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2344 2345 // Work vector 2346 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2347 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2348 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2349 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2350 2351 // Compute multiplicity across suboperators 2352 for (CeedInt i = 0; i < num_suboperators; i++) { 2353 const CeedScalar *sub_mult_array; 2354 CeedVector sub_mult_l_vec, ones_e_vec; 2355 2356 // -- Check for suboperator to skip 2357 for (CeedInt j = 0; j < num_skip_indices; j++) { 2358 if (skip_indices[j] == i) continue; 2359 } 2360 2361 // -- Sub operator multiplicity 2362 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2363 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr)); 2364 CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2365 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2366 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2367 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2368 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2369 // ---- Flag every node present in the current suboperator 2370 for (CeedInt j = 0; j < l_vec_len; j++) { 2371 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2372 } 2373 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2374 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2375 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2376 CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr)); 2377 } 2378 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2379 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2380 return CEED_ERROR_SUCCESS; 2381 } 2382 2383 /** 2384 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2385 grid interpolation 2386 2387 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2388 2389 @param[in] op_fine Fine grid operator 2390 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2391 @param[in] rstr_coarse Coarse grid restriction 2392 @param[in] basis_coarse Coarse grid active vector basis 2393 @param[out] op_coarse Coarse grid operator 2394 @param[out] op_prolong Coarse to fine operator, or NULL 2395 @param[out] op_restrict Fine to coarse operator, or NULL 2396 2397 @return An error code: 0 - success, otherwise - failure 2398 2399 @ref User 2400 **/ 2401 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2402 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2403 CeedBasis basis_c_to_f = NULL; 2404 2405 CeedCall(CeedOperatorCheckReady(op_fine)); 2406 2407 // Build prolongation matrix, if required 2408 if (op_prolong || op_restrict) { 2409 CeedBasis basis_fine; 2410 2411 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2412 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2413 } 2414 2415 // Core code 2416 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2417 return CEED_ERROR_SUCCESS; 2418 } 2419 2420 /** 2421 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2422 2423 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2424 2425 @param[in] op_fine Fine grid operator 2426 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2427 @param[in] rstr_coarse Coarse grid restriction 2428 @param[in] basis_coarse Coarse grid active vector basis 2429 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2430 @param[out] op_coarse Coarse grid operator 2431 @param[out] op_prolong Coarse to fine operator, or NULL 2432 @param[out] op_restrict Fine to coarse operator, or NULL 2433 2434 @return An error code: 0 - success, otherwise - failure 2435 2436 @ref User 2437 **/ 2438 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2439 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2440 CeedOperator *op_restrict) { 2441 Ceed ceed; 2442 CeedInt Q_f, Q_c; 2443 CeedBasis basis_fine, basis_c_to_f = NULL; 2444 2445 CeedCall(CeedOperatorCheckReady(op_fine)); 2446 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2447 2448 // Check for compatible quadrature spaces 2449 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2450 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2451 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2452 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2453 2454 // Create coarse to fine basis, if required 2455 if (op_prolong || op_restrict) { 2456 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2457 CeedScalar *q_ref, *q_weight, *grad; 2458 2459 // Check if interpolation matrix is provided 2460 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2461 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2462 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2463 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2464 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2465 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2466 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2467 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2468 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2469 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2470 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2471 CeedCall(CeedFree(&q_ref)); 2472 CeedCall(CeedFree(&q_weight)); 2473 CeedCall(CeedFree(&grad)); 2474 } 2475 2476 // Core code 2477 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2478 return CEED_ERROR_SUCCESS; 2479 } 2480 2481 /** 2482 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2483 2484 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2485 2486 @param[in] op_fine Fine grid operator 2487 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2488 @param[in] rstr_coarse Coarse grid restriction 2489 @param[in] basis_coarse Coarse grid active vector basis 2490 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2491 @param[out] op_coarse Coarse grid operator 2492 @param[out] op_prolong Coarse to fine operator, or NULL 2493 @param[out] op_restrict Fine to coarse operator, or NULL 2494 2495 @return An error code: 0 - success, otherwise - failure 2496 2497 @ref User 2498 **/ 2499 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2500 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2501 CeedOperator *op_restrict) { 2502 Ceed ceed; 2503 CeedInt Q_f, Q_c; 2504 CeedBasis basis_fine, basis_c_to_f = NULL; 2505 2506 CeedCall(CeedOperatorCheckReady(op_fine)); 2507 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2508 2509 // Check for compatible quadrature spaces 2510 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2511 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2512 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2513 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2514 2515 // Coarse to fine basis 2516 if (op_prolong || op_restrict) { 2517 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2518 CeedScalar *q_ref, *q_weight, *grad; 2519 CeedElemTopology topo; 2520 2521 // Check if interpolation matrix is provided 2522 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2523 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2524 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2525 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2526 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2527 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2528 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2529 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2530 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2531 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2532 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2533 CeedCall(CeedFree(&q_ref)); 2534 CeedCall(CeedFree(&q_weight)); 2535 CeedCall(CeedFree(&grad)); 2536 } 2537 2538 // Core code 2539 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2540 return CEED_ERROR_SUCCESS; 2541 } 2542 2543 /** 2544 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2545 2546 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2547 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2548 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2549 \hat S V\f$. 2550 The CeedOperator must be linear and non-composite. 2551 The associated CeedQFunction must therefore also be linear. 2552 2553 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2554 2555 @param[in] op CeedOperator to create element inverses 2556 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2557 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2558 2559 @return An error code: 0 - success, otherwise - failure 2560 2561 @ref User 2562 **/ 2563 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2564 Ceed ceed, ceed_parent; 2565 bool interp = false, grad = false, is_tensor_basis = true; 2566 CeedInt num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2567 CeedSize l_size = 1; 2568 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg; 2569 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2570 CeedVector q_data; 2571 CeedElemRestriction rstr = NULL, rstr_qd_i; 2572 CeedBasis basis = NULL, fdm_basis; 2573 CeedQFunctionContext ctx_fdm; 2574 CeedQFunctionField *qf_fields; 2575 CeedQFunction qf, qf_fdm; 2576 CeedOperatorField *op_fields; 2577 2578 CeedCall(CeedOperatorCheckReady(op)); 2579 2580 if (op->CreateFDMElementInverse) { 2581 // Backend version 2582 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2583 return CEED_ERROR_SUCCESS; 2584 } else { 2585 // Operator fallback 2586 CeedOperator op_fallback; 2587 2588 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2589 if (op_fallback) { 2590 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2591 return CEED_ERROR_SUCCESS; 2592 } 2593 } 2594 2595 // Default interface implementation 2596 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2597 CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent)); 2598 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2599 2600 // Determine active input basis 2601 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2602 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2603 for (CeedInt i = 0; i < num_input_fields; i++) { 2604 CeedVector vec; 2605 2606 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2607 if (vec == CEED_VECTOR_ACTIVE) { 2608 CeedEvalMode eval_mode; 2609 2610 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2611 interp = interp || eval_mode == CEED_EVAL_INTERP; 2612 grad = grad || eval_mode == CEED_EVAL_GRAD; 2613 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2614 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2615 } 2616 } 2617 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2618 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2619 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2620 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2621 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2622 CeedCall(CeedBasisGetDimension(basis, &dim)); 2623 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2624 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2625 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2626 2627 // Build and diagonalize 1D Mass and Laplacian 2628 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2629 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2630 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2631 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2632 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2633 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2634 CeedCall(CeedCalloc(P_1d, &lambda)); 2635 // -- Build matrices 2636 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2637 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2638 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2639 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2640 2641 // -- Diagonalize 2642 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2643 CeedCall(CeedFree(&mass)); 2644 CeedCall(CeedFree(&laplace)); 2645 for (CeedInt i = 0; i < P_1d; i++) { 2646 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2647 } 2648 CeedCall(CeedFree(&x)); 2649 2650 { 2651 CeedInt layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2652 CeedScalar max_norm = 0; 2653 const CeedScalar *assembled_array, *q_weight_array; 2654 CeedVector assembled = NULL, q_weight; 2655 CeedElemRestriction rstr_qf = NULL; 2656 2657 // Assemble QFunction 2658 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2659 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2660 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2661 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2662 2663 // Calculate element averages 2664 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2665 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2666 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2667 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2668 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2669 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2670 2671 for (CeedInt e = 0; e < num_elem; e++) { 2672 CeedInt count = 0; 2673 2674 for (CeedInt q = 0; q < num_qpts; q++) { 2675 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2676 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2677 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2678 count++; 2679 } 2680 } 2681 } 2682 if (count) { 2683 elem_avg[e] /= count; 2684 } else { 2685 elem_avg[e] = 1.0; 2686 } 2687 } 2688 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2689 CeedCall(CeedVectorDestroy(&assembled)); 2690 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2691 CeedCall(CeedVectorDestroy(&q_weight)); 2692 } 2693 2694 // Build FDM diagonal 2695 { 2696 CeedScalar *q_data_array, *fdm_diagonal; 2697 2698 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2699 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2700 for (CeedInt c = 0; c < num_comp; c++) { 2701 for (CeedInt n = 0; n < num_nodes; n++) { 2702 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2703 if (grad) { 2704 for (CeedInt d = 0; d < dim; d++) { 2705 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2706 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2707 } 2708 } 2709 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2710 } 2711 } 2712 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2713 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2714 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2715 for (CeedInt e = 0; e < num_elem; e++) { 2716 for (CeedInt c = 0; c < num_comp; c++) { 2717 for (CeedInt n = 0; n < num_nodes; n++) 2718 q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2719 } 2720 } 2721 CeedCall(CeedFree(&elem_avg)); 2722 CeedCall(CeedFree(&fdm_diagonal)); 2723 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2724 } 2725 2726 // Setup FDM operator 2727 // -- Basis 2728 { 2729 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2730 2731 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2732 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2733 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2734 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2735 CeedCall(CeedFree(&fdm_interp)); 2736 CeedCall(CeedFree(&grad_dummy)); 2737 CeedCall(CeedFree(&q_ref_dummy)); 2738 CeedCall(CeedFree(&q_weight_dummy)); 2739 CeedCall(CeedFree(&lambda)); 2740 } 2741 2742 // -- Restriction 2743 { 2744 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2745 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2746 } 2747 2748 // -- QFunction 2749 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2750 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2751 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2752 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2753 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2754 2755 // -- QFunction context 2756 { 2757 CeedInt *num_comp_data; 2758 2759 CeedCall(CeedCalloc(1, &num_comp_data)); 2760 num_comp_data[0] = num_comp; 2761 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2762 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2763 } 2764 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2765 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2766 2767 // -- Operator 2768 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2769 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2770 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data)); 2771 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2772 2773 // Cleanup 2774 CeedCall(CeedVectorDestroy(&q_data)); 2775 CeedCall(CeedBasisDestroy(&fdm_basis)); 2776 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2777 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2778 return CEED_ERROR_SUCCESS; 2779 } 2780 2781 /// @} 2782