1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed.h> 10 #include <ceed/backend.h> 11 #include <assert.h> 12 #include <math.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 17 /// @file 18 /// Implementation of CeedOperator preconditioning interfaces 19 20 /// ---------------------------------------------------------------------------- 21 /// CeedOperator Library Internal Preconditioning Functions 22 /// ---------------------------------------------------------------------------- 23 /// @addtogroup CeedOperatorDeveloper 24 /// @{ 25 26 /** 27 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) { 38 char *source_path_with_name = NULL; 39 40 // Check if NULL qf passed in 41 if (!qf) return CEED_ERROR_SUCCESS; 42 43 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 44 CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); 45 46 if (qf->source_path) { 47 size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); 48 CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); 49 memcpy(source_path_with_name, qf->source_path, path_len); 50 memcpy(&source_path_with_name[path_len], ":", 1); 51 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 52 } else { 53 CeedCall(CeedCalloc(1, &source_path_with_name)); 54 } 55 56 CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback)); 57 { 58 CeedQFunctionContext ctx; 59 60 CeedCall(CeedQFunctionGetContext(qf, &ctx)); 61 CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx)); 62 } 63 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 64 CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode)); 65 } 66 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 67 CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode)); 68 } 69 CeedCall(CeedFree(&source_path_with_name)); 70 return CEED_ERROR_SUCCESS; 71 } 72 73 /** 74 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality 75 76 @param[in,out] op CeedOperator to create fallback for 77 78 @return An error code: 0 - success, otherwise - failure 79 80 @ref Developer 81 **/ 82 static int CeedOperatorCreateFallback(CeedOperator op) { 83 Ceed ceed_fallback; 84 bool is_composite; 85 CeedOperator op_fallback; 86 87 // Check not already created 88 if (op->op_fallback) return CEED_ERROR_SUCCESS; 89 90 // Fallback Ceed 91 CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback)); 92 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 93 94 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 95 CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); 96 97 // Clone Op 98 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 99 if (is_composite) { 100 CeedInt num_suboperators; 101 CeedOperator *sub_operators; 102 103 CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback)); 104 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 105 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 106 for (CeedInt i = 0; i < num_suboperators; i++) { 107 CeedOperator op_sub_fallback; 108 109 CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback)); 110 CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback)); 111 } 112 } else { 113 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 114 115 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback)); 116 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback)); 117 CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback)); 118 CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback)); 119 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 120 CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis, 121 op->input_fields[i]->vec)); 122 } 123 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 124 CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis, 125 op->output_fields[i]->vec)); 126 } 127 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled)); 128 // Cleanup 129 CeedCall(CeedQFunctionDestroy(&qf_fallback)); 130 CeedCall(CeedQFunctionDestroy(&dqf_fallback)); 131 CeedCall(CeedQFunctionDestroy(&dqfT_fallback)); 132 } 133 CeedCall(CeedOperatorSetName(op_fallback, op->name)); 134 CeedCall(CeedOperatorCheckReady(op_fallback)); 135 // Note: No ref-counting here so we don't get caught in a reference loop. 136 // The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback. 137 op->op_fallback = op_fallback; 138 op_fallback->op_fallback_parent = op; 139 return CEED_ERROR_SUCCESS; 140 } 141 142 /** 143 @brief Select correct basis matrix pointer based on CeedEvalMode 144 145 @param[in] basis CeedBasis from which to get the basis matrix 146 @param[in] eval_mode Current basis evaluation mode 147 @param[in] identity Pointer to identity matrix 148 @param[out] basis_ptr Basis pointer to set 149 150 @ref Developer 151 **/ 152 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) { 153 switch (eval_mode) { 154 case CEED_EVAL_NONE: 155 *basis_ptr = identity; 156 break; 157 case CEED_EVAL_INTERP: 158 CeedCall(CeedBasisGetInterp(basis, basis_ptr)); 159 break; 160 case CEED_EVAL_GRAD: 161 CeedCall(CeedBasisGetGrad(basis, basis_ptr)); 162 break; 163 case CEED_EVAL_DIV: 164 CeedCall(CeedBasisGetDiv(basis, basis_ptr)); 165 break; 166 case CEED_EVAL_CURL: 167 CeedCall(CeedBasisGetCurl(basis, basis_ptr)); 168 break; 169 case CEED_EVAL_WEIGHT: 170 break; // Caught by QF Assembly 171 } 172 assert(*basis_ptr != NULL); 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 181 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 182 @param[out] assembled CeedVector to store assembled diagonal 183 184 @return An error code: 0 - success, otherwise - failure 185 186 @ref Developer 187 **/ 188 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) { 189 Ceed ceed; 190 bool is_composite; 191 192 CeedCall(CeedOperatorGetCeed(op, &ceed)); 193 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 194 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 195 196 // Assemble QFunction 197 CeedInt layout_qf[3]; 198 const CeedScalar *assembled_qf_array; 199 CeedVector assembled_qf = NULL; 200 CeedElemRestriction assembled_elem_rstr = NULL; 201 202 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); 203 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); 204 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 205 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 206 207 // Get assembly data 208 const CeedEvalMode **eval_modes_in, **eval_modes_out; 209 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 210 CeedSize **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components; 211 CeedBasis *active_bases_in, *active_bases_out; 212 CeedElemRestriction *active_elem_rstrs_in, *active_elem_rstrs_out; 213 CeedOperatorAssemblyData data; 214 215 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 216 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in, 217 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, 218 &num_output_components)); 219 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL)); 220 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out)); 221 222 // Loop over all active bases (find matching input/output pairs) 223 for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) { 224 CeedInt b_in, b_out, num_elem, num_nodes, num_qpts, num_comp; 225 bool has_eval_none = false; 226 CeedScalar *elem_diag_array, *identity = NULL; 227 CeedVector elem_diag; 228 CeedElemRestriction diag_elem_rstr; 229 230 if (num_active_bases_in <= num_active_bases_out) { 231 b_in = b; 232 for (b_out = 0; b_out < num_active_bases_out; b_out++) { 233 if (active_bases_in[b_in] == active_bases_out[b_out]) { 234 break; 235 } 236 } 237 if (b_out == num_active_bases_out) { 238 continue; 239 } // No matching output basis found 240 } else { 241 b_out = b; 242 for (b_in = 0; b_in < num_active_bases_in; b_in++) { 243 if (active_bases_in[b_in] == active_bases_out[b_out]) { 244 break; 245 } 246 } 247 if (b_in == num_active_bases_in) { 248 continue; 249 } // No matching output basis found 250 } 251 CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], ceed, CEED_ERROR_UNSUPPORTED, 252 "Cannot assemble operator diagonal with different input and output active element restrictions"); 253 254 // Assemble point block diagonal restriction, if needed 255 if (is_point_block) { 256 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr)); 257 } else { 258 CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr)); 259 } 260 261 // Create diagonal vector 262 CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag)); 263 264 // Assemble element operator diagonals 265 CeedCall(CeedVectorSetValue(elem_diag, 0.0)); 266 CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array)); 267 CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem)); 268 CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes)); 269 CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp)); 270 if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes; 271 else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts)); 272 273 // Construct identity matrix for basis if required 274 for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) { 275 has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE); 276 } 277 for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) { 278 has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE); 279 } 280 if (has_eval_none) { 281 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 282 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0; 283 } 284 285 // Compute the diagonal of B^T D B 286 // Each element 287 for (CeedSize e = 0; e < num_elem; e++) { 288 // Each basis eval mode pair 289 CeedInt d_out = 0, q_comp_out; 290 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 291 292 for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) { 293 CeedInt d_in = 0, q_comp_in; 294 const CeedScalar *B_t = NULL; 295 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 296 297 CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t)); 298 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out)); 299 if (q_comp_out > 1) { 300 if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0; 301 else B_t = &B_t[(++d_out) * num_qpts * num_nodes]; 302 } 303 eval_mode_out_prev = eval_modes_out[b_out][e_out]; 304 305 for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) { 306 const CeedScalar *B = NULL; 307 308 CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B)); 309 CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in)); 310 if (q_comp_in > 1) { 311 if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0; 312 else B = &B[(++d_in) * num_qpts * num_nodes]; 313 } 314 eval_mode_in_prev = eval_modes_in[b_in][e_in]; 315 316 // Each component 317 for (CeedInt c_out = 0; c_out < num_comp; c_out++) { 318 // Each qpt/node pair 319 for (CeedInt q = 0; q < num_qpts; q++) { 320 if (is_point_block) { 321 // Point Block Diagonal 322 for (CeedInt c_in = 0; c_in < num_comp; c_in++) { 323 const CeedSize c_offset = 324 (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out; 325 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 326 327 for (CeedInt n = 0; n < num_nodes; n++) { 328 elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] += 329 B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 330 } 331 } 332 } else { 333 // Diagonal Only 334 const CeedInt c_offset = 335 (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out; 336 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]]; 337 338 for (CeedInt n = 0; n < num_nodes; n++) { 339 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n]; 340 } 341 } 342 } 343 } 344 } 345 } 346 } 347 CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array)); 348 349 // Assemble local operator diagonal 350 CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request)); 351 352 // Cleanup 353 CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr)); 354 CeedCall(CeedVectorDestroy(&elem_diag)); 355 CeedCall(CeedFree(&identity)); 356 } 357 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 358 CeedCall(CeedVectorDestroy(&assembled_qf)); 359 return CEED_ERROR_SUCCESS; 360 } 361 362 /** 363 @brief Core logic for assembling composite operator diagonal 364 365 @param[in] op CeedOperator to assemble point block diagonal 366 @param[in] request Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE 367 @param[in] is_point_block Boolean flag to assemble diagonal or point block diagonal 368 @param[out] assembled CeedVector to store assembled diagonal 369 370 @return An error code: 0 - success, otherwise - failure 371 372 @ref Developer 373 **/ 374 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block, 375 CeedVector assembled) { 376 CeedInt num_sub; 377 CeedOperator *suboperators; 378 379 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub)); 380 CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators)); 381 for (CeedInt i = 0; i < num_sub; i++) { 382 if (is_point_block) { 383 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request)); 384 } else { 385 CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request)); 386 } 387 } 388 return CEED_ERROR_SUCCESS; 389 } 390 391 /** 392 @brief Build nonzero pattern for non-composite operator 393 394 Users should generally use CeedOperatorLinearAssembleSymbolic() 395 396 @param[in] op CeedOperator to assemble nonzero pattern 397 @param[in] offset Offset for number of entries 398 @param[out] rows Row number for each entry 399 @param[out] cols Column number for each entry 400 401 @return An error code: 0 - success, otherwise - failure 402 403 @ref Developer 404 **/ 405 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) { 406 Ceed ceed; 407 bool is_composite; 408 CeedSize num_nodes_in, num_nodes_out, count = 0; 409 CeedInt num_elem_in, elem_size_in, num_comp_in, layout_er_in[3]; 410 CeedInt num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries; 411 CeedScalar *array; 412 const CeedScalar *elem_dof_a_in, *elem_dof_a_out; 413 CeedVector index_vec_in, index_vec_out, elem_dof_in, elem_dof_out; 414 CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out; 415 416 CeedCall(CeedOperatorGetCeed(op, &ceed)); 417 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 418 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 419 420 CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out)); 421 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 422 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 423 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 424 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 425 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in)); 426 427 // Determine elem_dof relation for input 428 CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in)); 429 CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array)); 430 for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i; 431 CeedCall(CeedVectorRestoreArray(index_vec_in, &array)); 432 CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in)); 433 CeedCall(CeedVectorSetValue(elem_dof_in, 0.0)); 434 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in)); 435 CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE)); 436 CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in)); 437 CeedCall(CeedVectorDestroy(&index_vec_in)); 438 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in)); 439 440 if (elem_rstr_in != elem_rstr_out) { 441 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 442 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 443 "Active input and output operator restrictions must have the same number of elements"); 444 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 445 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 446 CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out)); 447 448 // Determine elem_dof relation for output 449 CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out)); 450 CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array)); 451 for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i; 452 CeedCall(CeedVectorRestoreArray(index_vec_out, &array)); 453 CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out)); 454 CeedCall(CeedVectorSetValue(elem_dof_out, 0.0)); 455 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out)); 456 CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE)); 457 CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out)); 458 CeedCall(CeedVectorDestroy(&index_vec_out)); 459 CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out)); 460 } else { 461 num_elem_out = num_elem_in; 462 elem_size_out = elem_size_in; 463 num_comp_out = num_comp_in; 464 layout_er_out[0] = layout_er_in[0]; 465 layout_er_out[1] = layout_er_in[1]; 466 layout_er_out[2] = layout_er_in[2]; 467 elem_dof_a_out = elem_dof_a_in; 468 } 469 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 470 471 // Determine i, j locations for element matrices 472 for (CeedInt e = 0; e < num_elem_in; e++) { 473 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 474 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 475 for (CeedInt i = 0; i < elem_size_out; i++) { 476 for (CeedInt j = 0; j < elem_size_in; j++) { 477 const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2]; 478 const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2]; 479 const CeedInt row = elem_dof_a_out[elem_dof_index_row]; 480 const CeedInt col = elem_dof_a_in[elem_dof_index_col]; 481 482 rows[offset + count] = row; 483 cols[offset + count] = col; 484 count++; 485 } 486 } 487 } 488 } 489 } 490 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 491 CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in)); 492 CeedCall(CeedVectorDestroy(&elem_dof_in)); 493 if (elem_rstr_in != elem_rstr_out) { 494 CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out)); 495 CeedCall(CeedVectorDestroy(&elem_dof_out)); 496 } 497 return CEED_ERROR_SUCCESS; 498 } 499 500 /** 501 @brief Assemble nonzero entries for non-composite operator 502 503 Users should generally use CeedOperatorLinearAssemble() 504 505 @param[in] op CeedOperator to assemble 506 @param[in] offset Offset for number of entries 507 @param[out] values Values to assemble into matrix 508 509 @return An error code: 0 - success, otherwise - failure 510 511 @ref Developer 512 **/ 513 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) { 514 Ceed ceed; 515 bool is_composite; 516 517 CeedCall(CeedOperatorGetCeed(op, &ceed)); 518 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 519 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 520 521 // Early exit for empty operator 522 { 523 CeedInt num_elem = 0; 524 525 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 526 if (num_elem == 0) return CEED_ERROR_SUCCESS; 527 } 528 529 if (op->LinearAssembleSingle) { 530 // Backend version 531 CeedCall(op->LinearAssembleSingle(op, offset, values)); 532 return CEED_ERROR_SUCCESS; 533 } else { 534 // Operator fallback 535 CeedOperator op_fallback; 536 537 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 538 if (op_fallback) { 539 CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values)); 540 return CEED_ERROR_SUCCESS; 541 } 542 } 543 544 // Assemble QFunction 545 CeedInt layout_qf[3]; 546 const CeedScalar *assembled_qf_array; 547 CeedVector assembled_qf = NULL; 548 CeedElemRestriction assembled_elem_rstr = NULL; 549 550 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE)); 551 CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); 552 CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); 553 CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); 554 555 // Get assembly data 556 CeedInt num_elem_in, elem_size_in, num_comp_in, num_qpts_in; 557 CeedInt num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries; 558 const CeedEvalMode **eval_modes_in, **eval_modes_out; 559 CeedInt num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out; 560 CeedBasis *active_bases_in, *active_bases_out, basis_in, basis_out; 561 const CeedScalar **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out; 562 CeedElemRestriction elem_rstr_in, elem_rstr_out; 563 CeedRestrictionType elem_rstr_type_in, elem_rstr_type_out; 564 const bool *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL; 565 const CeedInt8 *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL; 566 CeedOperatorAssemblyData data; 567 568 CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data)); 569 CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out, 570 &num_eval_modes_out, &eval_modes_out, NULL, NULL)); 571 572 CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED, 573 "Cannot assemble operator with multiple active bases"); 574 CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); 575 576 CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out)); 577 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out)); 578 basis_in = active_bases_in[0]; 579 basis_out = active_bases_out[0]; 580 B_mat_in = B_mats_in[0]; 581 B_mat_out = B_mats_out[0]; 582 583 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); 584 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); 585 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); 586 if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in; 587 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in)); 588 589 CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in)); 590 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 591 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in)); 592 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 593 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in)); 594 } 595 596 if (elem_rstr_in != elem_rstr_out) { 597 CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out)); 598 CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED, 599 "Active input and output operator restrictions must have the same number of elements"); 600 CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); 601 CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); 602 if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out; 603 else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out)); 604 CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED, 605 "Active input and output bases must have the same number of quadrature points"); 606 607 CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out)); 608 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 609 CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out)); 610 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 611 CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out)); 612 } 613 } else { 614 num_elem_out = num_elem_in; 615 elem_size_out = elem_size_in; 616 num_comp_out = num_comp_in; 617 num_qpts_out = num_qpts_in; 618 619 elem_rstr_orients_out = elem_rstr_orients_in; 620 elem_rstr_curl_orients_out = elem_rstr_curl_orients_in; 621 } 622 local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in; 623 624 // Loop over elements and put in data structure 625 // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 626 CeedSize count = 0; 627 CeedTensorContract contract; 628 CeedScalar *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL; 629 630 CeedCall(CeedBasisGetTensorContract(basis_in, &contract)); 631 CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat)); 632 CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat)); 633 if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b)); 634 635 CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals)); 636 for (CeedSize e = 0; e < num_elem_in; e++) { 637 for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) { 638 for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) { 639 // Compute B^T*D 640 for (CeedSize n = 0; n < elem_size_out; n++) { 641 for (CeedSize q = 0; q < num_qpts_in; q++) { 642 for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) { 643 const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in; 644 CeedScalar sum = 0.0; 645 646 for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) { 647 const CeedSize b_out_index = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n; 648 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out; 649 const CeedSize qf_index = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2]; 650 651 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; 652 } 653 BTD_mat[btd_index] = sum; 654 } 655 } 656 } 657 658 // Form element matrix itself (for each block component) 659 CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE, 660 false, B_mat_in, elem_mat)); 661 662 // Transform the element matrix if required 663 if (elem_rstr_orients_out) { 664 const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out]; 665 666 for (CeedInt i = 0; i < elem_size_out; i++) { 667 const double orient = elem_orients[i] ? -1.0 : 1.0; 668 669 for (CeedInt j = 0; j < elem_size_in; j++) { 670 elem_mat[i * elem_size_in + j] *= orient; 671 } 672 } 673 } else if (elem_rstr_curl_orients_out) { 674 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out]; 675 676 // T^T*(B^T*D*B) 677 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 678 for (CeedInt i = 0; i < elem_size_out; i++) { 679 for (CeedInt j = 0; j < elem_size_in; j++) { 680 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] + 681 (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) + 682 (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0); 683 } 684 } 685 } 686 if (elem_rstr_orients_in) { 687 const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in]; 688 689 for (CeedInt i = 0; i < elem_size_out; i++) { 690 for (CeedInt j = 0; j < elem_size_in; j++) { 691 elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0; 692 } 693 } 694 } else if (elem_rstr_curl_orients_in) { 695 const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in]; 696 697 // (B^T*D*B)*T 698 memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar)); 699 for (CeedInt i = 0; i < elem_size_out; i++) { 700 for (CeedInt j = 0; j < elem_size_in; j++) { 701 elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] + 702 (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) + 703 (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0); 704 } 705 } 706 } 707 708 // Put element matrix in coordinate data structure 709 for (CeedInt i = 0; i < elem_size_out; i++) { 710 for (CeedInt j = 0; j < elem_size_in; j++) { 711 vals[offset + count] = elem_mat[i * elem_size_in + j]; 712 count++; 713 } 714 } 715 } 716 } 717 } 718 CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries"); 719 CeedCall(CeedVectorRestoreArray(values, &vals)); 720 721 // Cleanup 722 CeedCall(CeedFree(&BTD_mat)); 723 CeedCall(CeedFree(&elem_mat)); 724 CeedCall(CeedFree(&elem_mat_b)); 725 if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) { 726 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in)); 727 } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) { 728 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in)); 729 } 730 if (elem_rstr_in != elem_rstr_out) { 731 if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) { 732 CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out)); 733 } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) { 734 CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out)); 735 } 736 } 737 CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); 738 CeedCall(CeedVectorDestroy(&assembled_qf)); 739 return CEED_ERROR_SUCCESS; 740 } 741 742 /** 743 @brief Count number of entries for assembled CeedOperator 744 745 @param[in] op CeedOperator to assemble 746 @param[out] num_entries Number of entries in assembled representation 747 748 @return An error code: 0 - success, otherwise - failure 749 750 @ref Utility 751 **/ 752 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) { 753 bool is_composite; 754 CeedInt num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out; 755 CeedElemRestriction rstr_in, rstr_out; 756 757 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 758 CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); 759 760 CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out)); 761 CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in)); 762 CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in)); 763 CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in)); 764 if (rstr_in != rstr_out) { 765 CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out)); 766 CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED, 767 "Active input and output operator restrictions must have the same number of elements"); 768 CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out)); 769 CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out)); 770 } else { 771 num_elem_out = num_elem_in; 772 elem_size_out = elem_size_in; 773 num_comp_out = num_comp_in; 774 } 775 *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in; 776 return CEED_ERROR_SUCCESS; 777 } 778 779 /** 780 @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator 781 782 @param[in] op_fine Fine grid operator 783 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 784 @param[in] rstr_coarse Coarse grid restriction 785 @param[in] basis_coarse Coarse grid active vector basis 786 @param[in] basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 787 @param[out] op_coarse Coarse grid operator 788 @param[out] op_prolong Coarse to fine operator, or NULL 789 @param[out] op_restrict Fine to coarse operator, or NULL 790 791 @return An error code: 0 - success, otherwise - failure 792 793 @ref Developer 794 **/ 795 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 796 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 797 bool is_composite; 798 Ceed ceed; 799 CeedInt num_comp; 800 CeedVector mult_vec = NULL; 801 CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL; 802 803 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 804 805 // Check for composite operator 806 CeedCall(CeedOperatorIsComposite(op_fine, &is_composite)); 807 CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported"); 808 809 // Coarse Grid 810 CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse)); 811 // -- Clone input fields 812 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 813 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 814 rstr_fine = op_fine->input_fields[i]->elem_rstr; 815 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 816 } else { 817 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr, 818 op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec)); 819 } 820 } 821 // -- Clone output fields 822 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 823 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 824 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE)); 825 } else { 826 CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr, 827 op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec)); 828 } 829 } 830 // -- Clone QFunctionAssemblyData 831 CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled)); 832 833 // Multiplicity vector 834 if (op_restrict || op_prolong) { 835 CeedVector mult_e_vec; 836 CeedRestrictionType rstr_type; 837 838 CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type)); 839 CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED, 840 "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported"); 841 CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector"); 842 CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine)); 843 CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec)); 844 CeedCall(CeedVectorSetValue(mult_e_vec, 0.0)); 845 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE)); 846 CeedCall(CeedVectorSetValue(mult_vec, 0.0)); 847 CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE)); 848 CeedCall(CeedVectorDestroy(&mult_e_vec)); 849 CeedCall(CeedVectorReciprocal(mult_vec)); 850 } 851 852 // Clone name 853 bool has_name = op_fine->name; 854 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 855 CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name)); 856 857 // Check that coarse to fine basis is provided if prolong/restrict operators are requested 858 CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE, 859 "Prolongation or restriction operator creation requires coarse-to-fine basis"); 860 861 // Restriction/Prolongation Operators 862 CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp)); 863 864 // Restriction 865 if (op_restrict) { 866 CeedInt *num_comp_r_data; 867 CeedQFunctionContext ctx_r; 868 CeedQFunction qf_restrict; 869 870 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict)); 871 CeedCall(CeedCalloc(1, &num_comp_r_data)); 872 num_comp_r_data[0] = num_comp; 873 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r)); 874 CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data)); 875 CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r)); 876 CeedCall(CeedQFunctionContextDestroy(&ctx_r)); 877 CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE)); 878 CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE)); 879 CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP)); 880 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp)); 881 882 CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict)); 883 CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 884 CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 885 CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 886 887 // Set name 888 char *restriction_name; 889 890 CeedCall(CeedCalloc(17 + name_len, &restriction_name)); 891 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 892 CeedCall(CeedOperatorSetName(*op_restrict, restriction_name)); 893 CeedCall(CeedFree(&restriction_name)); 894 895 // Check 896 CeedCall(CeedOperatorCheckReady(*op_restrict)); 897 898 // Cleanup 899 CeedCall(CeedQFunctionDestroy(&qf_restrict)); 900 } 901 902 // Prolongation 903 if (op_prolong) { 904 CeedInt *num_comp_p_data; 905 CeedQFunctionContext ctx_p; 906 CeedQFunction qf_prolong; 907 908 CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong)); 909 CeedCall(CeedCalloc(1, &num_comp_p_data)); 910 num_comp_p_data[0] = num_comp; 911 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p)); 912 CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data)); 913 CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p)); 914 CeedCall(CeedQFunctionContextDestroy(&ctx_p)); 915 CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP)); 916 CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE)); 917 CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE)); 918 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp)); 919 920 CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong)); 921 CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE)); 922 CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec)); 923 CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE)); 924 925 // Set name 926 char *prolongation_name; 927 928 CeedCall(CeedCalloc(18 + name_len, &prolongation_name)); 929 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : ""); 930 CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name)); 931 CeedCall(CeedFree(&prolongation_name)); 932 933 // Check 934 CeedCall(CeedOperatorCheckReady(*op_prolong)); 935 936 // Cleanup 937 CeedCall(CeedQFunctionDestroy(&qf_prolong)); 938 } 939 940 // Check 941 CeedCall(CeedOperatorCheckReady(*op_coarse)); 942 943 // Cleanup 944 CeedCall(CeedVectorDestroy(&mult_vec)); 945 CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); 946 CeedCall(CeedBasisDestroy(&basis_c_to_f)); 947 return CEED_ERROR_SUCCESS; 948 } 949 950 /** 951 @brief Build 1D mass matrix and Laplacian with perturbation 952 953 @param[in] interp_1d Interpolation matrix in one dimension 954 @param[in] grad_1d Gradient matrix in one dimension 955 @param[in] q_weight_1d Quadrature weights in one dimension 956 @param[in] P_1d Number of basis nodes in one dimension 957 @param[in] Q_1d Number of quadrature points in one dimension 958 @param[in] dim Dimension of basis 959 @param[out] mass Assembled mass matrix in one dimension 960 @param[out] laplace Assembled perturbed Laplacian in one dimension 961 962 @return An error code: 0 - success, otherwise - failure 963 964 @ref Developer 965 **/ 966 CeedPragmaOptimizeOff 967 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d, 968 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) { 969 for (CeedInt i = 0; i < P_1d; i++) { 970 for (CeedInt j = 0; j < P_1d; j++) { 971 CeedScalar sum = 0.0; 972 for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j]; 973 mass[i + j * P_1d] = sum; 974 } 975 } 976 // -- Laplacian 977 for (CeedInt i = 0; i < P_1d; i++) { 978 for (CeedInt j = 0; j < P_1d; j++) { 979 CeedScalar sum = 0.0; 980 981 for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j]; 982 laplace[i + j * P_1d] = sum; 983 } 984 } 985 CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4; 986 for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation; 987 return CEED_ERROR_SUCCESS; 988 } 989 CeedPragmaOptimizeOn 990 991 /// @} 992 993 /// ---------------------------------------------------------------------------- 994 /// CeedOperator Backend API 995 /// ---------------------------------------------------------------------------- 996 /// @addtogroup CeedOperatorBackend 997 /// @{ 998 999 /** 1000 @brief Create point block restriction for active operator field 1001 1002 @param[in] rstr Original CeedElemRestriction for active field 1003 @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored 1004 1005 @return An error code: 0 - success, otherwise - failure 1006 1007 @ref Backend 1008 **/ 1009 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) { 1010 Ceed ceed; 1011 CeedInt num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets; 1012 CeedSize l_size; 1013 const CeedInt *offsets; 1014 1015 CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed)); 1016 CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets)); 1017 1018 // Expand offsets 1019 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 1020 CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 1021 CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 1022 CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 1023 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 1024 shift = num_comp; 1025 if (comp_stride != 1) shift *= num_comp; 1026 CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets)); 1027 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 1028 point_block_offsets[i] = offsets[i] * shift; 1029 } 1030 1031 // Create new restriction 1032 CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, 1033 point_block_offsets, point_block_rstr)); 1034 1035 // Cleanup 1036 CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets)); 1037 return CEED_ERROR_SUCCESS; 1038 } 1039 1040 /** 1041 @brief Create object holding CeedQFunction assembly data for CeedOperator 1042 1043 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1044 @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored 1045 1046 @return An error code: 0 - success, otherwise - failure 1047 1048 @ref Backend 1049 **/ 1050 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) { 1051 CeedCall(CeedCalloc(1, data)); 1052 (*data)->ref_count = 1; 1053 (*data)->ceed = ceed; 1054 CeedCall(CeedReference(ceed)); 1055 return CEED_ERROR_SUCCESS; 1056 } 1057 1058 /** 1059 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1060 1061 @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter 1062 1063 @return An error code: 0 - success, otherwise - failure 1064 1065 @ref Backend 1066 **/ 1067 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1068 data->ref_count++; 1069 return CEED_ERROR_SUCCESS; 1070 } 1071 1072 /** 1073 @brief Set re-use of CeedQFunctionAssemblyData 1074 1075 @param[in,out] data CeedQFunctionAssemblyData to mark for reuse 1076 @param[in] reuse_data Boolean flag indicating data re-use 1077 1078 @return An error code: 0 - success, otherwise - failure 1079 1080 @ref Backend 1081 **/ 1082 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) { 1083 data->reuse_data = reuse_data; 1084 data->needs_data_update = true; 1085 return CEED_ERROR_SUCCESS; 1086 } 1087 1088 /** 1089 @brief Mark QFunctionAssemblyData as stale 1090 1091 @param[in,out] data CeedQFunctionAssemblyData to mark as stale 1092 @param[in] needs_data_update Boolean flag indicating if update is needed or completed 1093 1094 @return An error code: 0 - success, otherwise - failure 1095 1096 @ref Backend 1097 **/ 1098 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) { 1099 data->needs_data_update = needs_data_update; 1100 return CEED_ERROR_SUCCESS; 1101 } 1102 1103 /** 1104 @brief Determine if QFunctionAssemblyData needs update 1105 1106 @param[in] data CeedQFunctionAssemblyData to mark as stale 1107 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1108 1109 @return An error code: 0 - success, otherwise - failure 1110 1111 @ref Backend 1112 **/ 1113 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) { 1114 *is_update_needed = !data->reuse_data || data->needs_data_update; 1115 return CEED_ERROR_SUCCESS; 1116 } 1117 1118 /** 1119 @brief Copy the pointer to a CeedQFunctionAssemblyData. 1120 1121 Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`. 1122 1123 Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a 1124 CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this 1125 CeedQFunctionAssemblyData. 1126 1127 @param[in] data CeedQFunctionAssemblyData to copy reference to 1128 @param[in,out] data_copy Variable to store copied reference 1129 1130 @return An error code: 0 - success, otherwise - failure 1131 1132 @ref Backend 1133 **/ 1134 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) { 1135 CeedCall(CeedQFunctionAssemblyDataReference(data)); 1136 CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy)); 1137 *data_copy = data; 1138 return CEED_ERROR_SUCCESS; 1139 } 1140 1141 /** 1142 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1143 1144 @param[in] data CeedQFunctionAssemblyData to retrieve status 1145 @param[out] is_setup Boolean flag for setup status 1146 1147 @return An error code: 0 - success, otherwise - failure 1148 1149 @ref Backend 1150 **/ 1151 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) { 1152 *is_setup = data->is_setup; 1153 return CEED_ERROR_SUCCESS; 1154 } 1155 1156 /** 1157 @brief Set internal objects for CeedQFunctionAssemblyData 1158 1159 @param[in,out] data CeedQFunctionAssemblyData to set objects 1160 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1161 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1162 1163 @return An error code: 0 - success, otherwise - failure 1164 1165 @ref Backend 1166 **/ 1167 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) { 1168 CeedCall(CeedVectorReferenceCopy(vec, &data->vec)); 1169 CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr)); 1170 1171 data->is_setup = true; 1172 return CEED_ERROR_SUCCESS; 1173 } 1174 1175 /** 1176 @brief Get internal objects for CeedQFunctionAssemblyData 1177 1178 @param[in,out] data CeedQFunctionAssemblyData to set objects 1179 @param[out] vec CeedVector to store assembled CeedQFunction at quadrature points 1180 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1181 1182 @return An error code: 0 - success, otherwise - failure 1183 1184 @ref Backend 1185 **/ 1186 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) { 1187 CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1188 1189 CeedCall(CeedVectorReferenceCopy(data->vec, vec)); 1190 CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr)); 1191 return CEED_ERROR_SUCCESS; 1192 } 1193 1194 /** 1195 @brief Destroy CeedQFunctionAssemblyData 1196 1197 @param[in,out] data CeedQFunctionAssemblyData to destroy 1198 1199 @return An error code: 0 - success, otherwise - failure 1200 1201 @ref Backend 1202 **/ 1203 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1204 if (!*data || --(*data)->ref_count > 0) { 1205 *data = NULL; 1206 return CEED_ERROR_SUCCESS; 1207 } 1208 CeedCall(CeedDestroy(&(*data)->ceed)); 1209 CeedCall(CeedVectorDestroy(&(*data)->vec)); 1210 CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr)); 1211 1212 CeedCall(CeedFree(data)); 1213 return CEED_ERROR_SUCCESS; 1214 } 1215 1216 /** 1217 @brief Get CeedOperatorAssemblyData 1218 1219 @param[in] op CeedOperator to assemble 1220 @param[out] data CeedQFunctionAssemblyData 1221 1222 @return An error code: 0 - success, otherwise - failure 1223 1224 @ref Backend 1225 **/ 1226 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) { 1227 if (!op->op_assembled) { 1228 CeedOperatorAssemblyData data; 1229 1230 CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data)); 1231 op->op_assembled = data; 1232 } 1233 *data = op->op_assembled; 1234 return CEED_ERROR_SUCCESS; 1235 } 1236 1237 /** 1238 @brief Create object holding CeedOperator assembly data. 1239 1240 The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator. 1241 An array with references to the corresponding active CeedElemRestrictions is also stored. 1242 For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis. 1243 The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each 1244 CeedEvalMode. 1245 The number of input columns across all active bases for the assembled CeedQFunction is also stored. 1246 Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes. 1247 1248 @param[in] ceed Ceed object where the CeedOperatorAssemblyData will be created 1249 @param[in] op CeedOperator to be assembled 1250 @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored 1251 1252 @return An error code: 0 - success, otherwise - failure 1253 1254 @ref Backend 1255 **/ 1256 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) { 1257 CeedInt num_active_bases_in = 0, num_active_bases_out = 0, offset = 0; 1258 CeedInt num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL; 1259 CeedSize **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL; 1260 CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL; 1261 CeedQFunctionField *qf_fields; 1262 CeedQFunction qf; 1263 CeedOperatorField *op_fields; 1264 bool is_composite; 1265 1266 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1267 CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators."); 1268 1269 // Allocate 1270 CeedCall(CeedCalloc(1, data)); 1271 (*data)->ceed = ceed; 1272 CeedCall(CeedReference(ceed)); 1273 1274 // Build OperatorAssembly data 1275 CeedCall(CeedOperatorGetQFunction(op, &qf)); 1276 CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL)); 1277 CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL)); 1278 1279 // Determine active input basis 1280 for (CeedInt i = 0; i < num_input_fields; i++) { 1281 CeedVector vec; 1282 1283 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1284 if (vec == CEED_VECTOR_ACTIVE) { 1285 CeedInt index = -1, num_comp, q_comp; 1286 CeedEvalMode eval_mode; 1287 CeedBasis basis_in = NULL; 1288 1289 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); 1290 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1291 CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp)); 1292 CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); 1293 for (CeedInt i = 0; i < num_active_bases_in; i++) { 1294 if ((*data)->active_bases_in[i] == basis_in) index = i; 1295 } 1296 if (index == -1) { 1297 CeedElemRestriction elem_rstr_in; 1298 1299 index = num_active_bases_in; 1300 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in)); 1301 (*data)->active_bases_in[num_active_bases_in] = NULL; 1302 CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in])); 1303 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in)); 1304 (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL; 1305 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); 1306 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in])); 1307 CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in)); 1308 num_eval_modes_in[index] = 0; 1309 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in)); 1310 eval_modes_in[index] = NULL; 1311 CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in)); 1312 eval_mode_offsets_in[index] = NULL; 1313 CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in)); 1314 (*data)->assembled_bases_in[index] = NULL; 1315 num_active_bases_in++; 1316 } 1317 if (eval_mode != CEED_EVAL_WEIGHT) { 1318 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1319 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index])); 1320 CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index])); 1321 for (CeedInt d = 0; d < q_comp; d++) { 1322 eval_modes_in[index][num_eval_modes_in[index] + d] = eval_mode; 1323 eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset; 1324 offset += num_comp; 1325 } 1326 num_eval_modes_in[index] += q_comp; 1327 } 1328 } 1329 } 1330 1331 // Determine active output basis 1332 CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields)); 1333 CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); 1334 offset = 0; 1335 for (CeedInt i = 0; i < num_output_fields; i++) { 1336 CeedVector vec; 1337 1338 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 1339 if (vec == CEED_VECTOR_ACTIVE) { 1340 CeedInt index = -1, num_comp, q_comp; 1341 CeedEvalMode eval_mode; 1342 CeedBasis basis_out = NULL; 1343 1344 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); 1345 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 1346 CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp)); 1347 CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); 1348 for (CeedInt i = 0; i < num_active_bases_out; i++) { 1349 if ((*data)->active_bases_out[i] == basis_out) index = i; 1350 } 1351 if (index == -1) { 1352 CeedElemRestriction elem_rstr_out; 1353 1354 index = num_active_bases_out; 1355 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out)); 1356 (*data)->active_bases_out[num_active_bases_out] = NULL; 1357 CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out])); 1358 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out)); 1359 (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL; 1360 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); 1361 CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out])); 1362 CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out)); 1363 num_eval_modes_out[index] = 0; 1364 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out)); 1365 eval_modes_out[index] = NULL; 1366 CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out)); 1367 eval_mode_offsets_out[index] = NULL; 1368 CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out)); 1369 (*data)->assembled_bases_out[index] = NULL; 1370 num_active_bases_out++; 1371 } 1372 if (eval_mode != CEED_EVAL_WEIGHT) { 1373 // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly 1374 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index])); 1375 CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index])); 1376 for (CeedInt d = 0; d < q_comp; d++) { 1377 eval_modes_out[index][num_eval_modes_out[index] + d] = eval_mode; 1378 eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset; 1379 offset += num_comp; 1380 } 1381 num_eval_modes_out[index] += q_comp; 1382 } 1383 } 1384 } 1385 (*data)->num_active_bases_in = num_active_bases_in; 1386 (*data)->num_eval_modes_in = num_eval_modes_in; 1387 (*data)->eval_modes_in = eval_modes_in; 1388 (*data)->eval_mode_offsets_in = eval_mode_offsets_in; 1389 (*data)->num_active_bases_out = num_active_bases_out; 1390 (*data)->num_eval_modes_out = num_eval_modes_out; 1391 (*data)->eval_modes_out = eval_modes_out; 1392 (*data)->eval_mode_offsets_out = eval_mode_offsets_out; 1393 (*data)->num_output_components = offset; 1394 return CEED_ERROR_SUCCESS; 1395 } 1396 1397 /** 1398 @brief Get CeedOperator CeedEvalModes for assembly. 1399 1400 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1401 1402 @param[in] data CeedOperatorAssemblyData 1403 @param[out] num_active_bases_in Total number of active bases for input 1404 @param[out] num_eval_modes_in Pointer to hold array of numbers of input CeedEvalModes, or NULL. 1405 `eval_modes_in[0]` holds an array of eval modes for the first active basis. 1406 @param[out] eval_modes_in Pointer to hold arrays of input CeedEvalModes, or NULL. 1407 @param[out] eval_mode_offsets_in Pointer to hold arrays of input offsets at each quadrature point. 1408 @param[out] num_active_bases_out Total number of active bases for output 1409 @param[out] num_eval_modes_out Pointer to hold array of numbers of output CeedEvalModes, or NULL 1410 @param[out] eval_modes_out Pointer to hold arrays of output CeedEvalModes, or NULL. 1411 @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point 1412 @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point, 1413 including contributions of all active bases 1414 1415 @return An error code: 0 - success, otherwise - failure 1416 1417 @ref Backend 1418 **/ 1419 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in, 1420 const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out, 1421 CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, 1422 CeedSize *num_output_components) { 1423 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1424 if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in; 1425 if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in; 1426 if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in; 1427 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1428 if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out; 1429 if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out; 1430 if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out; 1431 if (num_output_components) *num_output_components = data->num_output_components; 1432 return CEED_ERROR_SUCCESS; 1433 } 1434 1435 /** 1436 @brief Get CeedOperator CeedBasis data for assembly. 1437 1438 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1439 1440 @param[in] data CeedOperatorAssemblyData 1441 @param[out] num_active_bases_in Number of active input bases, or NULL 1442 @param[out] active_bases_in Pointer to hold active input CeedBasis, or NULL 1443 @param[out] assembled_bases_in Pointer to hold assembled active input B, or NULL 1444 @param[out] num_active_bases_out Number of active output bases, or NULL 1445 @param[out] active_bases_out Pointer to hold active output CeedBasis, or NULL 1446 @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL 1447 1448 @return An error code: 0 - success, otherwise - failure 1449 1450 @ref Backend 1451 **/ 1452 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in, 1453 const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out, 1454 const CeedScalar ***assembled_bases_out) { 1455 // Assemble B_in, B_out if needed 1456 if (assembled_bases_in && !data->assembled_bases_in[0]) { 1457 CeedInt num_qpts; 1458 1459 if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts)); 1460 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts)); 1461 for (CeedInt b = 0; b < data->num_active_bases_in; b++) { 1462 bool has_eval_none = false; 1463 CeedInt num_nodes; 1464 CeedScalar *B_in = NULL, *identity = NULL; 1465 1466 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes)); 1467 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in)); 1468 1469 for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) { 1470 has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE); 1471 } 1472 if (has_eval_none) { 1473 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1474 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1475 identity[i * num_nodes + i] = 1.0; 1476 } 1477 } 1478 1479 for (CeedInt q = 0; q < num_qpts; q++) { 1480 for (CeedInt n = 0; n < num_nodes; n++) { 1481 CeedInt d_in = 0, q_comp_in; 1482 CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE; 1483 1484 for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) { 1485 const CeedInt qq = data->num_eval_modes_in[b] * q; 1486 const CeedScalar *B = NULL; 1487 1488 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B)); 1489 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in)); 1490 if (q_comp_in > 1) { 1491 if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0; 1492 else B = &B[(++d_in) * num_qpts * num_nodes]; 1493 } 1494 eval_mode_in_prev = data->eval_modes_in[b][e_in]; 1495 B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n]; 1496 } 1497 } 1498 } 1499 if (identity) CeedCall(CeedFree(&identity)); 1500 data->assembled_bases_in[b] = B_in; 1501 } 1502 } 1503 1504 if (assembled_bases_out && !data->assembled_bases_out[0]) { 1505 CeedInt num_qpts; 1506 1507 if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts)); 1508 else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts)); 1509 for (CeedInt b = 0; b < data->num_active_bases_out; b++) { 1510 bool has_eval_none = false; 1511 CeedInt num_nodes; 1512 CeedScalar *B_out = NULL, *identity = NULL; 1513 1514 CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes)); 1515 CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out)); 1516 1517 for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) { 1518 has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE); 1519 } 1520 if (has_eval_none) { 1521 CeedCall(CeedCalloc(num_qpts * num_nodes, &identity)); 1522 for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) { 1523 identity[i * num_nodes + i] = 1.0; 1524 } 1525 } 1526 1527 for (CeedInt q = 0; q < num_qpts; q++) { 1528 for (CeedInt n = 0; n < num_nodes; n++) { 1529 CeedInt d_out = 0, q_comp_out; 1530 CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE; 1531 1532 for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) { 1533 const CeedInt qq = data->num_eval_modes_out[b] * q; 1534 const CeedScalar *B = NULL; 1535 1536 CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B)); 1537 CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out)); 1538 if (q_comp_out > 1) { 1539 if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0; 1540 else B = &B[(++d_out) * num_qpts * num_nodes]; 1541 } 1542 eval_mode_out_prev = data->eval_modes_out[b][e_out]; 1543 B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n]; 1544 } 1545 } 1546 } 1547 if (identity) CeedCall(CeedFree(&identity)); 1548 data->assembled_bases_out[b] = B_out; 1549 } 1550 } 1551 1552 // Pass out assembled data 1553 if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in; 1554 if (active_bases_in) *active_bases_in = data->active_bases_in; 1555 if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in; 1556 if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out; 1557 if (active_bases_out) *active_bases_out = data->active_bases_out; 1558 if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out; 1559 return CEED_ERROR_SUCCESS; 1560 } 1561 1562 /** 1563 @brief Get CeedOperator CeedBasis data for assembly. 1564 1565 Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object. 1566 1567 @param[in] data CeedOperatorAssemblyData 1568 @param[out] num_active_elem_rstrs_in Number of active input element restrictions, or NULL 1569 @param[out] active_elem_rstrs_in Pointer to hold active input CeedElemRestrictions, or NULL 1570 @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL 1571 @param[out] active_elem_rstrs_out Pointer to hold active output CeedElemRestrictions, or NULL 1572 1573 @return An error code: 0 - success, otherwise - failure 1574 1575 @ref Backend 1576 **/ 1577 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in, 1578 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out, 1579 CeedElemRestriction **active_elem_rstrs_out) { 1580 if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in; 1581 if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in; 1582 if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out; 1583 if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out; 1584 return CEED_ERROR_SUCCESS; 1585 } 1586 1587 /** 1588 @brief Destroy CeedOperatorAssemblyData 1589 1590 @param[in,out] data CeedOperatorAssemblyData to destroy 1591 1592 @return An error code: 0 - success, otherwise - failure 1593 1594 @ref Backend 1595 **/ 1596 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { 1597 if (!*data) { 1598 *data = NULL; 1599 return CEED_ERROR_SUCCESS; 1600 } 1601 CeedCall(CeedDestroy(&(*data)->ceed)); 1602 for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) { 1603 CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b])); 1604 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b])); 1605 CeedCall(CeedFree(&(*data)->eval_modes_in[b])); 1606 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b])); 1607 CeedCall(CeedFree(&(*data)->assembled_bases_in[b])); 1608 } 1609 for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) { 1610 CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b])); 1611 CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b])); 1612 CeedCall(CeedFree(&(*data)->eval_modes_out[b])); 1613 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b])); 1614 CeedCall(CeedFree(&(*data)->assembled_bases_out[b])); 1615 } 1616 CeedCall(CeedFree(&(*data)->active_bases_in)); 1617 CeedCall(CeedFree(&(*data)->active_bases_out)); 1618 CeedCall(CeedFree(&(*data)->active_elem_rstrs_in)); 1619 CeedCall(CeedFree(&(*data)->active_elem_rstrs_out)); 1620 CeedCall(CeedFree(&(*data)->num_eval_modes_in)); 1621 CeedCall(CeedFree(&(*data)->num_eval_modes_out)); 1622 CeedCall(CeedFree(&(*data)->eval_modes_in)); 1623 CeedCall(CeedFree(&(*data)->eval_modes_out)); 1624 CeedCall(CeedFree(&(*data)->eval_mode_offsets_in)); 1625 CeedCall(CeedFree(&(*data)->eval_mode_offsets_out)); 1626 CeedCall(CeedFree(&(*data)->assembled_bases_in)); 1627 CeedCall(CeedFree(&(*data)->assembled_bases_out)); 1628 1629 CeedCall(CeedFree(data)); 1630 return CEED_ERROR_SUCCESS; 1631 } 1632 1633 /** 1634 @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 1635 1636 @param[in] op CeedOperator to retrieve fallback for 1637 @param[out] op_fallback Fallback CeedOperator 1638 1639 @return An error code: 0 - success, otherwise - failure 1640 1641 @ref Backend 1642 **/ 1643 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 1644 // Create if needed 1645 if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op)); 1646 if (op->op_fallback) { 1647 bool is_debug; 1648 1649 CeedCall(CeedIsDebug(op->ceed, &is_debug)); 1650 if (is_debug) { 1651 Ceed ceed, ceed_fallback; 1652 const char *resource, *resource_fallback; 1653 1654 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1655 CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback)); 1656 CeedCall(CeedGetResource(ceed, &resource)); 1657 CeedCall(CeedGetResource(ceed_fallback, &resource_fallback)); 1658 1659 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n"); 1660 CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback, 1661 op->op_fallback); 1662 } 1663 } 1664 *op_fallback = op->op_fallback; 1665 return CEED_ERROR_SUCCESS; 1666 } 1667 1668 /** 1669 @brief Get the parent CeedOperator for a fallback CeedOperator 1670 1671 @param[in] op CeedOperator context 1672 @param[out] parent Variable to store parent CeedOperator context 1673 1674 @return An error code: 0 - success, otherwise - failure 1675 1676 @ref Backend 1677 **/ 1678 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) { 1679 *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL; 1680 return CEED_ERROR_SUCCESS; 1681 } 1682 1683 /** 1684 @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator 1685 1686 @param[in] op CeedOperator context 1687 @param[out] parent Variable to store parent Ceed context 1688 1689 @return An error code: 0 - success, otherwise - failure 1690 1691 @ref Backend 1692 **/ 1693 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) { 1694 *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed; 1695 return CEED_ERROR_SUCCESS; 1696 } 1697 1698 /// @} 1699 1700 /// ---------------------------------------------------------------------------- 1701 /// CeedOperator Public API 1702 /// ---------------------------------------------------------------------------- 1703 /// @addtogroup CeedOperatorUser 1704 /// @{ 1705 1706 /** 1707 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1708 1709 This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator. 1710 The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices 1711 representing the action of the CeedQFunction for a corresponding quadrature point on an element. 1712 1713 Inputs and outputs are in the order provided by the user when adding CeedOperator fields. 1714 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction 1715 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1716 1717 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1718 1719 @param[in] op CeedOperator to assemble CeedQFunction 1720 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1721 @param[out] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1722 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1723 1724 @return An error code: 0 - success, otherwise - failure 1725 1726 @ref User 1727 **/ 1728 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1729 CeedCall(CeedOperatorCheckReady(op)); 1730 1731 if (op->LinearAssembleQFunction) { 1732 // Backend version 1733 CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request)); 1734 } else { 1735 // Operator fallback 1736 CeedOperator op_fallback; 1737 1738 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1739 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request)); 1740 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction"); 1741 } 1742 return CEED_ERROR_SUCCESS; 1743 } 1744 1745 /** 1746 @brief Assemble CeedQFunction and store result internally. 1747 1748 Return copied references of stored data to the caller. 1749 Caller is responsible for ownership and destruction of the copied references. 1750 See also @ref CeedOperatorLinearAssembleQFunction 1751 1752 Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers. 1753 These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object. 1754 1755 @param[in] op CeedOperator to assemble CeedQFunction 1756 @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points 1757 @param[out] rstr CeedElemRestriction for CeedVector containing assembledCeedQFunction 1758 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1759 1760 @return An error code: 0 - success, otherwise - failure 1761 1762 @ref User 1763 **/ 1764 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1765 int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL; 1766 CeedOperator op_assemble = NULL; 1767 CeedOperator op_fallback_parent = NULL; 1768 1769 CeedCall(CeedOperatorCheckReady(op)); 1770 1771 // Determine if fallback parent or operator has implementation 1772 CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent)); 1773 if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) { 1774 // -- Backend version for op fallback parent is faster, if it exists 1775 LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate; 1776 op_assemble = op_fallback_parent; 1777 } else if (op->LinearAssembleQFunctionUpdate) { 1778 // -- Backend version for op 1779 LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate; 1780 op_assemble = op; 1781 } 1782 1783 // Assemble QFunction 1784 if (LinearAssembleQFunctionUpdate) { 1785 // Backend or fallback parent version 1786 bool qf_assembled_is_setup; 1787 CeedVector assembled_vec = NULL; 1788 CeedElemRestriction assembled_rstr = NULL; 1789 1790 CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup)); 1791 if (qf_assembled_is_setup) { 1792 bool update_needed; 1793 1794 CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr)); 1795 CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed)); 1796 if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request)); 1797 } else { 1798 CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request)); 1799 CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr)); 1800 } 1801 CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false)); 1802 1803 // Copy reference from internally held copy 1804 CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled)); 1805 CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr)); 1806 CeedCall(CeedVectorDestroy(&assembled_vec)); 1807 CeedCall(CeedElemRestrictionDestroy(&assembled_rstr)); 1808 } else { 1809 // Operator fallback 1810 CeedOperator op_fallback; 1811 1812 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1813 if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 1814 else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1815 } 1816 return CEED_ERROR_SUCCESS; 1817 } 1818 1819 /** 1820 @brief Assemble the diagonal of a square linear CeedOperator 1821 1822 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1823 1824 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1825 1826 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1827 1828 @param[in] op CeedOperator to assemble CeedQFunction 1829 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1830 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1831 1832 @return An error code: 0 - success, otherwise - failure 1833 1834 @ref User 1835 **/ 1836 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1837 bool is_composite; 1838 CeedSize input_size = 0, output_size = 0; 1839 1840 CeedCall(CeedOperatorCheckReady(op)); 1841 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1842 1843 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1844 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1845 1846 // Early exit for empty operator 1847 if (!is_composite) { 1848 CeedInt num_elem = 0; 1849 1850 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1851 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1852 } 1853 1854 if (op->LinearAssembleDiagonal) { 1855 // Backend version 1856 CeedCall(op->LinearAssembleDiagonal(op, assembled, request)); 1857 return CEED_ERROR_SUCCESS; 1858 } else if (op->LinearAssembleAddDiagonal) { 1859 // Backend version with zeroing first 1860 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1861 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1862 return CEED_ERROR_SUCCESS; 1863 } else { 1864 // Operator fallback 1865 CeedOperator op_fallback; 1866 1867 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1868 if (op_fallback) { 1869 CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request)); 1870 return CEED_ERROR_SUCCESS; 1871 } 1872 } 1873 // Default interface implementation 1874 CeedCall(CeedVectorSetValue(assembled, 0.0)); 1875 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request)); 1876 return CEED_ERROR_SUCCESS; 1877 } 1878 1879 /** 1880 @brief Assemble the diagonal of a square linear CeedOperator 1881 1882 This sums into a CeedVector the diagonal of a linear CeedOperator. 1883 1884 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 1885 1886 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1887 1888 @param[in] op CeedOperator to assemble CeedQFunction 1889 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1890 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 1891 1892 @return An error code: 0 - success, otherwise - failure 1893 1894 @ref User 1895 **/ 1896 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 1897 bool is_composite; 1898 CeedSize input_size = 0, output_size = 0; 1899 1900 CeedCall(CeedOperatorCheckReady(op)); 1901 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1902 1903 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1904 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1905 1906 // Early exit for empty operator 1907 if (!is_composite) { 1908 CeedInt num_elem = 0; 1909 1910 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 1911 if (num_elem == 0) return CEED_ERROR_SUCCESS; 1912 } 1913 1914 if (op->LinearAssembleAddDiagonal) { 1915 // Backend version 1916 CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request)); 1917 return CEED_ERROR_SUCCESS; 1918 } else { 1919 // Operator fallback 1920 CeedOperator op_fallback; 1921 1922 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 1923 if (op_fallback) { 1924 CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 1925 return CEED_ERROR_SUCCESS; 1926 } 1927 } 1928 // Default interface implementation 1929 if (is_composite) { 1930 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled)); 1931 } else { 1932 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled)); 1933 } 1934 return CEED_ERROR_SUCCESS; 1935 } 1936 1937 /** 1938 @brief Fully assemble the point-block diagonal pattern of a linear operator. 1939 1940 Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal(). 1941 1942 The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the 1943 matrix in entry (i, j). 1944 Note that the (i, j) pairs are unique. 1945 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in 1946 the same ordering. 1947 1948 This will generally be slow unless your operator is low-order. 1949 1950 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 1951 1952 @param[in] op CeedOperator to assemble 1953 @param[out] num_entries Number of entries in coordinate nonzero pattern 1954 @param[out] rows Row number for each entry 1955 @param[out] cols Column number for each entry 1956 1957 @ref User 1958 **/ 1959 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 1960 Ceed ceed; 1961 bool is_composite; 1962 CeedInt num_active_components, num_sub_operators; 1963 CeedOperator *sub_operators; 1964 1965 CeedCall(CeedOperatorGetCeed(op, &ceed)); 1966 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 1967 1968 CeedSize input_size = 0, output_size = 0; 1969 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 1970 CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1971 1972 if (is_composite) { 1973 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators)); 1974 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 1975 } else { 1976 sub_operators = &op; 1977 num_sub_operators = 1; 1978 } 1979 1980 // Verify operator can be assembled correctly 1981 { 1982 CeedOperatorAssemblyData data; 1983 CeedInt num_active_elem_rstrs, comp_stride; 1984 CeedElemRestriction *active_elem_rstrs; 1985 1986 // Get initial values to check against 1987 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data)); 1988 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 1989 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride)); 1990 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components)); 1991 1992 // Verify that all active element restrictions have same component stride and number of components 1993 for (CeedInt k = 0; k < num_sub_operators; k++) { 1994 CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data)); 1995 CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL)); 1996 for (CeedInt i = 0; i < num_active_elem_rstrs; i++) { 1997 CeedInt comp_stride_sub, num_active_components_sub; 1998 1999 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub)); 2000 CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION, 2001 "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub); 2002 CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub)); 2003 CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE, 2004 "All suboperators must have the same number of output components"); 2005 } 2006 } 2007 } 2008 *num_entries = input_size * num_active_components; 2009 CeedCall(CeedCalloc(*num_entries, rows)); 2010 CeedCall(CeedCalloc(*num_entries, cols)); 2011 2012 for (CeedInt o = 0; o < num_sub_operators; o++) { 2013 CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr; 2014 CeedInt comp_stride, num_elem, elem_size; 2015 const CeedInt *offsets, *point_block_offsets; 2016 2017 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr)); 2018 CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride)); 2019 CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem)); 2020 CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size)); 2021 CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets)); 2022 2023 CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr)); 2024 CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets)); 2025 2026 for (CeedSize i = 0; i < num_elem * elem_size; i++) { 2027 for (CeedInt c_out = 0; c_out < num_active_components; c_out++) { 2028 for (CeedInt c_in = 0; c_in < num_active_components; c_in++) { 2029 (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride; 2030 (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride; 2031 } 2032 } 2033 } 2034 2035 CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets)); 2036 CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets)); 2037 CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr)); 2038 } 2039 return CEED_ERROR_SUCCESS; 2040 } 2041 2042 /** 2043 @brief Assemble the point block diagonal of a square linear CeedOperator 2044 2045 This overwrites a CeedVector with the point block diagonal of a linear CeedOperator. 2046 2047 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 2048 2049 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2050 2051 @param[in] op CeedOperator to assemble CeedQFunction 2052 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 2053 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 2054 component in]. 2055 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2056 2057 @return An error code: 0 - success, otherwise - failure 2058 2059 @ref User 2060 **/ 2061 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2062 bool is_composite; 2063 CeedSize input_size = 0, output_size = 0; 2064 2065 CeedCall(CeedOperatorCheckReady(op)); 2066 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2067 2068 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2069 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2070 2071 // Early exit for empty operator 2072 if (!is_composite) { 2073 CeedInt num_elem = 0; 2074 2075 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2076 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2077 } 2078 2079 if (op->LinearAssemblePointBlockDiagonal) { 2080 // Backend version 2081 CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request)); 2082 return CEED_ERROR_SUCCESS; 2083 } else if (op->LinearAssembleAddPointBlockDiagonal) { 2084 // Backend version with zeroing first 2085 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2086 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2087 return CEED_ERROR_SUCCESS; 2088 } else { 2089 // Operator fallback 2090 CeedOperator op_fallback; 2091 2092 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2093 if (op_fallback) { 2094 CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request)); 2095 return CEED_ERROR_SUCCESS; 2096 } 2097 } 2098 // Default interface implementation 2099 CeedCall(CeedVectorSetValue(assembled, 0.0)); 2100 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2101 return CEED_ERROR_SUCCESS; 2102 } 2103 2104 /** 2105 @brief Assemble the point block diagonal of a square linear CeedOperator 2106 2107 This sums into a CeedVector with the point block diagonal of a linear CeedOperator. 2108 2109 Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported. 2110 2111 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2112 2113 @param[in] op CeedOperator to assemble CeedQFunction 2114 @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp 2115 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out, 2116 component in]. 2117 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2118 2119 @return An error code: 0 - success, otherwise - failure 2120 2121 @ref User 2122 **/ 2123 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) { 2124 bool is_composite; 2125 CeedSize input_size = 0, output_size = 0; 2126 2127 CeedCall(CeedOperatorCheckReady(op)); 2128 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2129 2130 CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size)); 2131 CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 2132 2133 // Early exit for empty operator 2134 if (!is_composite) { 2135 CeedInt num_elem = 0; 2136 2137 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2138 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2139 } 2140 2141 if (op->LinearAssembleAddPointBlockDiagonal) { 2142 // Backend version 2143 CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request)); 2144 return CEED_ERROR_SUCCESS; 2145 } else { 2146 // Operator fallback 2147 CeedOperator op_fallback; 2148 2149 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2150 if (op_fallback) { 2151 CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request)); 2152 return CEED_ERROR_SUCCESS; 2153 } 2154 } 2155 // Default interface implementation 2156 if (is_composite) { 2157 CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled)); 2158 } else { 2159 CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled)); 2160 } 2161 return CEED_ERROR_SUCCESS; 2162 } 2163 2164 /** 2165 @brief Fully assemble the nonzero pattern of a linear operator. 2166 2167 Expected to be used in conjunction with CeedOperatorLinearAssemble(). 2168 2169 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 2170 matrix in entry (i, j). 2171 Note that the (i, j) pairs are not unique and may repeat. 2172 This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering. 2173 2174 This will generally be slow unless your operator is low-order. 2175 2176 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2177 2178 @param[in] op CeedOperator to assemble 2179 @param[out] num_entries Number of entries in coordinate nonzero pattern 2180 @param[out] rows Row number for each entry 2181 @param[out] cols Column number for each entry 2182 2183 @ref User 2184 **/ 2185 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) { 2186 bool is_composite; 2187 CeedInt num_suboperators, offset = 0; 2188 CeedSize single_entries; 2189 CeedOperator *sub_operators; 2190 2191 CeedCall(CeedOperatorCheckReady(op)); 2192 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2193 2194 if (op->LinearAssembleSymbolic) { 2195 // Backend version 2196 CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols)); 2197 return CEED_ERROR_SUCCESS; 2198 } else { 2199 // Operator fallback 2200 CeedOperator op_fallback; 2201 2202 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2203 if (op_fallback) { 2204 CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols)); 2205 return CEED_ERROR_SUCCESS; 2206 } 2207 } 2208 2209 // Default interface implementation 2210 2211 // Count entries and allocate rows, cols arrays 2212 *num_entries = 0; 2213 if (is_composite) { 2214 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2215 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2216 for (CeedInt k = 0; k < num_suboperators; ++k) { 2217 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2218 *num_entries += single_entries; 2219 } 2220 } else { 2221 CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries)); 2222 *num_entries += single_entries; 2223 } 2224 CeedCall(CeedCalloc(*num_entries, rows)); 2225 CeedCall(CeedCalloc(*num_entries, cols)); 2226 2227 // Assemble nonzero locations 2228 if (is_composite) { 2229 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2230 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2231 for (CeedInt k = 0; k < num_suboperators; ++k) { 2232 CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols)); 2233 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2234 offset += single_entries; 2235 } 2236 } else { 2237 CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols)); 2238 } 2239 return CEED_ERROR_SUCCESS; 2240 } 2241 2242 /** 2243 @brief Fully assemble the nonzero entries of a linear operator. 2244 2245 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic(). 2246 2247 The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the 2248 matrix in entry (i, j). 2249 Note that the (i, j) pairs are not unique and may repeat. 2250 This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 2251 2252 This will generally be slow unless your operator is low-order. 2253 2254 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2255 2256 @param[in] op CeedOperator to assemble 2257 @param[out] values Values to assemble into matrix 2258 2259 @ref User 2260 **/ 2261 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 2262 bool is_composite; 2263 CeedInt num_suboperators, offset = 0; 2264 CeedSize single_entries = 0; 2265 CeedOperator *sub_operators; 2266 2267 CeedCall(CeedOperatorCheckReady(op)); 2268 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 2269 2270 // Early exit for empty operator 2271 if (!is_composite) { 2272 CeedInt num_elem = 0; 2273 2274 CeedCall(CeedOperatorGetNumElements(op, &num_elem)); 2275 if (num_elem == 0) return CEED_ERROR_SUCCESS; 2276 } 2277 2278 if (op->LinearAssemble) { 2279 // Backend version 2280 CeedCall(op->LinearAssemble(op, values)); 2281 return CEED_ERROR_SUCCESS; 2282 } else { 2283 // Operator fallback 2284 CeedOperator op_fallback; 2285 2286 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2287 if (op_fallback) { 2288 CeedCall(CeedOperatorLinearAssemble(op_fallback, values)); 2289 return CEED_ERROR_SUCCESS; 2290 } 2291 } 2292 2293 // Default interface implementation 2294 CeedCall(CeedVectorSetValue(values, 0.0)); 2295 if (is_composite) { 2296 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2297 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2298 for (CeedInt k = 0; k < num_suboperators; k++) { 2299 CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values)); 2300 CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries)); 2301 offset += single_entries; 2302 } 2303 } else { 2304 CeedCall(CeedSingleOperatorAssemble(op, offset, values)); 2305 } 2306 return CEED_ERROR_SUCCESS; 2307 } 2308 2309 /** 2310 @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator 2311 2312 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2313 2314 @param[in] op Composite CeedOperator 2315 @param[in] num_skip_indices Number of suboperators to skip 2316 @param[in] skip_indices Array of indices of suboperators to skip 2317 @param[out] mult Vector to store multiplicity (of size l_size) 2318 2319 @return An error code: 0 - success, otherwise - failure 2320 2321 @ref User 2322 **/ 2323 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) { 2324 Ceed ceed; 2325 CeedInt num_suboperators; 2326 CeedSize l_vec_len; 2327 CeedScalar *mult_array; 2328 CeedVector ones_l_vec; 2329 CeedElemRestriction elem_rstr, mult_elem_rstr; 2330 CeedOperator *sub_operators; 2331 2332 CeedCall(CeedOperatorCheckReady(op)); 2333 2334 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2335 2336 // Zero mult vector 2337 CeedCall(CeedVectorSetValue(mult, 0.0)); 2338 2339 // Get suboperators 2340 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 2341 CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators)); 2342 if (num_suboperators == 0) return CEED_ERROR_SUCCESS; 2343 2344 // Work vector 2345 CeedCall(CeedVectorGetLength(mult, &l_vec_len)); 2346 CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec)); 2347 CeedCall(CeedVectorSetValue(ones_l_vec, 1.0)); 2348 CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array)); 2349 2350 // Compute multiplicity across suboperators 2351 for (CeedInt i = 0; i < num_suboperators; i++) { 2352 const CeedScalar *sub_mult_array; 2353 CeedVector sub_mult_l_vec, ones_e_vec; 2354 2355 // -- Check for suboperator to skip 2356 for (CeedInt j = 0; j < num_skip_indices; j++) { 2357 if (skip_indices[j] == i) continue; 2358 } 2359 2360 // -- Sub operator multiplicity 2361 CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); 2362 CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr)); 2363 CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec)); 2364 CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); 2365 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); 2366 CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE)); 2367 CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array)); 2368 // ---- Flag every node present in the current suboperator 2369 for (CeedInt j = 0; j < l_vec_len; j++) { 2370 if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0; 2371 } 2372 CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array)); 2373 CeedCall(CeedVectorDestroy(&sub_mult_l_vec)); 2374 CeedCall(CeedVectorDestroy(&ones_e_vec)); 2375 CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr)); 2376 } 2377 CeedCall(CeedVectorRestoreArray(mult, &mult_array)); 2378 CeedCall(CeedVectorDestroy(&ones_l_vec)); 2379 return CEED_ERROR_SUCCESS; 2380 } 2381 2382 /** 2383 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse 2384 grid interpolation 2385 2386 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2387 2388 @param[in] op_fine Fine grid operator 2389 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2390 @param[in] rstr_coarse Coarse grid restriction 2391 @param[in] basis_coarse Coarse grid active vector basis 2392 @param[out] op_coarse Coarse grid operator 2393 @param[out] op_prolong Coarse to fine operator, or NULL 2394 @param[out] op_restrict Fine to coarse operator, or NULL 2395 2396 @return An error code: 0 - success, otherwise - failure 2397 2398 @ref User 2399 **/ 2400 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2401 CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) { 2402 CeedBasis basis_c_to_f = NULL; 2403 2404 CeedCall(CeedOperatorCheckReady(op_fine)); 2405 2406 // Build prolongation matrix, if required 2407 if (op_prolong || op_restrict) { 2408 CeedBasis basis_fine; 2409 2410 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2411 CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); 2412 } 2413 2414 // Core code 2415 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2416 return CEED_ERROR_SUCCESS; 2417 } 2418 2419 /** 2420 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis 2421 2422 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2423 2424 @param[in] op_fine Fine grid operator 2425 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2426 @param[in] rstr_coarse Coarse grid restriction 2427 @param[in] basis_coarse Coarse grid active vector basis 2428 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2429 @param[out] op_coarse Coarse grid operator 2430 @param[out] op_prolong Coarse to fine operator, or NULL 2431 @param[out] op_restrict Fine to coarse operator, or NULL 2432 2433 @return An error code: 0 - success, otherwise - failure 2434 2435 @ref User 2436 **/ 2437 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2438 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2439 CeedOperator *op_restrict) { 2440 Ceed ceed; 2441 CeedInt Q_f, Q_c; 2442 CeedBasis basis_fine, basis_c_to_f = NULL; 2443 2444 CeedCall(CeedOperatorCheckReady(op_fine)); 2445 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2446 2447 // Check for compatible quadrature spaces 2448 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2449 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2450 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2451 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2452 2453 // Create coarse to fine basis, if required 2454 if (op_prolong || op_restrict) { 2455 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2456 CeedScalar *q_ref, *q_weight, *grad; 2457 2458 // Check if interpolation matrix is provided 2459 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2460 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2461 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2462 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2463 CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); 2464 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2465 P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); 2466 CeedCall(CeedCalloc(P_1d_f, &q_ref)); 2467 CeedCall(CeedCalloc(P_1d_f, &q_weight)); 2468 CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad)); 2469 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2470 CeedCall(CeedFree(&q_ref)); 2471 CeedCall(CeedFree(&q_weight)); 2472 CeedCall(CeedFree(&grad)); 2473 } 2474 2475 // Core code 2476 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2477 return CEED_ERROR_SUCCESS; 2478 } 2479 2480 /** 2481 @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector 2482 2483 Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable. 2484 2485 @param[in] op_fine Fine grid operator 2486 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators 2487 @param[in] rstr_coarse Coarse grid restriction 2488 @param[in] basis_coarse Coarse grid active vector basis 2489 @param[in] interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators 2490 @param[out] op_coarse Coarse grid operator 2491 @param[out] op_prolong Coarse to fine operator, or NULL 2492 @param[out] op_restrict Fine to coarse operator, or NULL 2493 2494 @return An error code: 0 - success, otherwise - failure 2495 2496 @ref User 2497 **/ 2498 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2499 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 2500 CeedOperator *op_restrict) { 2501 Ceed ceed; 2502 CeedInt Q_f, Q_c; 2503 CeedBasis basis_fine, basis_c_to_f = NULL; 2504 2505 CeedCall(CeedOperatorCheckReady(op_fine)); 2506 CeedCall(CeedOperatorGetCeed(op_fine, &ceed)); 2507 2508 // Check for compatible quadrature spaces 2509 CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); 2510 CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f)); 2511 CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c)); 2512 CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 2513 2514 // Coarse to fine basis 2515 if (op_prolong || op_restrict) { 2516 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2517 CeedScalar *q_ref, *q_weight, *grad; 2518 CeedElemTopology topo; 2519 2520 // Check if interpolation matrix is provided 2521 CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE, 2522 "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix"); 2523 CeedCall(CeedBasisGetTopology(basis_fine, &topo)); 2524 CeedCall(CeedBasisGetDimension(basis_fine, &dim)); 2525 CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); 2526 CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); 2527 CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); 2528 CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); 2529 CeedCall(CeedCalloc(num_nodes_f, &q_weight)); 2530 CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad)); 2531 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f)); 2532 CeedCall(CeedFree(&q_ref)); 2533 CeedCall(CeedFree(&q_weight)); 2534 CeedCall(CeedFree(&grad)); 2535 } 2536 2537 // Core code 2538 CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict)); 2539 return CEED_ERROR_SUCCESS; 2540 } 2541 2542 /** 2543 @brief Build a FDM based approximate inverse for each element for a CeedOperator 2544 2545 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse. 2546 This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$. 2547 The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T 2548 \hat S V\f$. 2549 The CeedOperator must be linear and non-composite. 2550 The associated CeedQFunction must therefore also be linear. 2551 2552 Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable. 2553 2554 @param[in] op CeedOperator to create element inverses 2555 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element 2556 @param[in] request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE 2557 2558 @return An error code: 0 - success, otherwise - failure 2559 2560 @ref User 2561 **/ 2562 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) { 2563 Ceed ceed, ceed_parent; 2564 bool interp = false, grad = false, is_tensor_basis = true; 2565 CeedInt num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1; 2566 CeedSize l_size = 1; 2567 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg; 2568 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2569 CeedVector q_data; 2570 CeedElemRestriction rstr = NULL, rstr_qd_i; 2571 CeedBasis basis = NULL, fdm_basis; 2572 CeedQFunctionContext ctx_fdm; 2573 CeedQFunctionField *qf_fields; 2574 CeedQFunction qf, qf_fdm; 2575 CeedOperatorField *op_fields; 2576 2577 CeedCall(CeedOperatorCheckReady(op)); 2578 2579 if (op->CreateFDMElementInverse) { 2580 // Backend version 2581 CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request)); 2582 return CEED_ERROR_SUCCESS; 2583 } else { 2584 // Operator fallback 2585 CeedOperator op_fallback; 2586 2587 CeedCall(CeedOperatorGetFallback(op, &op_fallback)); 2588 if (op_fallback) { 2589 CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request)); 2590 return CEED_ERROR_SUCCESS; 2591 } 2592 } 2593 2594 // Default interface implementation 2595 CeedCall(CeedOperatorGetCeed(op, &ceed)); 2596 CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent)); 2597 CeedCall(CeedOperatorGetQFunction(op, &qf)); 2598 2599 // Determine active input basis 2600 CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL)); 2601 CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); 2602 for (CeedInt i = 0; i < num_input_fields; i++) { 2603 CeedVector vec; 2604 2605 CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec)); 2606 if (vec == CEED_VECTOR_ACTIVE) { 2607 CeedEvalMode eval_mode; 2608 2609 CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); 2610 interp = interp || eval_mode == CEED_EVAL_INTERP; 2611 grad = grad || eval_mode == CEED_EVAL_GRAD; 2612 CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); 2613 CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); 2614 } 2615 } 2616 CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); 2617 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 2618 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 2619 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2620 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 2621 CeedCall(CeedBasisGetDimension(basis, &dim)); 2622 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 2623 CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 2624 CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 2625 2626 // Build and diagonalize 1D Mass and Laplacian 2627 CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis)); 2628 CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases"); 2629 CeedCall(CeedCalloc(P_1d * P_1d, &mass)); 2630 CeedCall(CeedCalloc(P_1d * P_1d, &laplace)); 2631 CeedCall(CeedCalloc(P_1d * P_1d, &x)); 2632 CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp)); 2633 CeedCall(CeedCalloc(P_1d, &lambda)); 2634 // -- Build matrices 2635 CeedCall(CeedBasisGetInterp1D(basis, &interp_1d)); 2636 CeedCall(CeedBasisGetGrad1D(basis, &grad_1d)); 2637 CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d)); 2638 CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace)); 2639 2640 // -- Diagonalize 2641 CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d)); 2642 CeedCall(CeedFree(&mass)); 2643 CeedCall(CeedFree(&laplace)); 2644 for (CeedInt i = 0; i < P_1d; i++) { 2645 for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d]; 2646 } 2647 CeedCall(CeedFree(&x)); 2648 2649 { 2650 CeedInt layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0); 2651 CeedScalar max_norm = 0; 2652 const CeedScalar *assembled_array, *q_weight_array; 2653 CeedVector assembled = NULL, q_weight; 2654 CeedElemRestriction rstr_qf = NULL; 2655 2656 // Assemble QFunction 2657 CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); 2658 CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); 2659 CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); 2660 CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); 2661 2662 // Calculate element averages 2663 CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight)); 2664 CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight)); 2665 CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array)); 2666 CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array)); 2667 CeedCall(CeedCalloc(num_elem, &elem_avg)); 2668 const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON; 2669 2670 for (CeedInt e = 0; e < num_elem; e++) { 2671 CeedInt count = 0; 2672 2673 for (CeedInt q = 0; q < num_qpts; q++) { 2674 for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) { 2675 if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) { 2676 elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q]; 2677 count++; 2678 } 2679 } 2680 } 2681 if (count) { 2682 elem_avg[e] /= count; 2683 } else { 2684 elem_avg[e] = 1.0; 2685 } 2686 } 2687 CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array)); 2688 CeedCall(CeedVectorDestroy(&assembled)); 2689 CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array)); 2690 CeedCall(CeedVectorDestroy(&q_weight)); 2691 } 2692 2693 // Build FDM diagonal 2694 { 2695 CeedScalar *q_data_array, *fdm_diagonal; 2696 2697 CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal)); 2698 const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON; 2699 for (CeedInt c = 0; c < num_comp; c++) { 2700 for (CeedInt n = 0; n < num_nodes; n++) { 2701 if (interp) fdm_diagonal[c * num_nodes + n] = 1.0; 2702 if (grad) { 2703 for (CeedInt d = 0; d < dim; d++) { 2704 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2705 fdm_diagonal[c * num_nodes + n] += lambda[i]; 2706 } 2707 } 2708 if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound; 2709 } 2710 } 2711 CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data)); 2712 CeedCall(CeedVectorSetValue(q_data, 0.0)); 2713 CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array)); 2714 for (CeedInt e = 0; e < num_elem; e++) { 2715 for (CeedInt c = 0; c < num_comp; c++) { 2716 for (CeedInt n = 0; n < num_nodes; n++) 2717 q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]); 2718 } 2719 } 2720 CeedCall(CeedFree(&elem_avg)); 2721 CeedCall(CeedFree(&fdm_diagonal)); 2722 CeedCall(CeedVectorRestoreArray(q_data, &q_data_array)); 2723 } 2724 2725 // Setup FDM operator 2726 // -- Basis 2727 { 2728 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2729 2730 CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy)); 2731 CeedCall(CeedCalloc(P_1d, &q_ref_dummy)); 2732 CeedCall(CeedCalloc(P_1d, &q_weight_dummy)); 2733 CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis)); 2734 CeedCall(CeedFree(&fdm_interp)); 2735 CeedCall(CeedFree(&grad_dummy)); 2736 CeedCall(CeedFree(&q_ref_dummy)); 2737 CeedCall(CeedFree(&q_weight_dummy)); 2738 CeedCall(CeedFree(&lambda)); 2739 } 2740 2741 // -- Restriction 2742 { 2743 CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp}; 2744 CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i)); 2745 } 2746 2747 // -- QFunction 2748 CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm)); 2749 CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP)); 2750 CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE)); 2751 CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP)); 2752 CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp)); 2753 2754 // -- QFunction context 2755 { 2756 CeedInt *num_comp_data; 2757 2758 CeedCall(CeedCalloc(1, &num_comp_data)); 2759 num_comp_data[0] = num_comp; 2760 CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm)); 2761 CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data)); 2762 } 2763 CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm)); 2764 CeedCall(CeedQFunctionContextDestroy(&ctx_fdm)); 2765 2766 // -- Operator 2767 CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv)); 2768 CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2769 CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data)); 2770 CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE)); 2771 2772 // Cleanup 2773 CeedCall(CeedVectorDestroy(&q_data)); 2774 CeedCall(CeedBasisDestroy(&fdm_basis)); 2775 CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); 2776 CeedCall(CeedQFunctionDestroy(&qf_fdm)); 2777 return CEED_ERROR_SUCCESS; 2778 } 2779 2780 /// @} 2781