1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed-impl.h> 11 #include <math.h> 12 #include <stdbool.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 /// @file 17 /// Implementation of CeedOperator preconditioning interfaces 18 19 /// ---------------------------------------------------------------------------- 20 /// CeedOperator Library Internal Preconditioning Functions 21 /// ---------------------------------------------------------------------------- 22 /// @addtogroup CeedOperatorDeveloper 23 /// @{ 24 25 /** 26 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced 27 CeedOperator functionality 28 29 @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction 30 @param[in] qf CeedQFunction to create fallback for 31 @param[out] qf_fallback fallback CeedQFunction 32 33 @return An error code: 0 - success, otherwise - failure 34 35 @ref Developer 36 **/ 37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, 38 CeedQFunction *qf_fallback) { 39 int ierr; 40 41 // Check if NULL qf passed in 42 if (!qf) return CEED_ERROR_SUCCESS; 43 44 char *source_path_with_name = ""; 45 if (qf->source_path) { 46 size_t path_len = strlen(qf->source_path), 47 name_len = strlen(qf->kernel_name); 48 ierr = CeedCalloc(path_len + name_len + 2, &source_path_with_name); 49 CeedChk(ierr); 50 memcpy(source_path_with_name, qf->source_path, path_len); 51 memcpy(&source_path_with_name[path_len], ":", 1); 52 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 53 } else { 54 ierr = CeedCalloc(1, &source_path_with_name); CeedChk(ierr); 55 } 56 57 ierr = CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, 58 qf->function, source_path_with_name, 59 qf_fallback); CeedChk(ierr); 60 { 61 CeedQFunctionContext ctx; 62 63 ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr); 64 ierr = CeedQFunctionSetContext(*qf_fallback, ctx); CeedChk(ierr); 65 } 66 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 67 ierr = CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, 68 qf->input_fields[i]->size, 69 qf->input_fields[i]->eval_mode); CeedChk(ierr); 70 } 71 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 72 ierr = CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, 73 qf->output_fields[i]->size, 74 qf->output_fields[i]->eval_mode); CeedChk(ierr); 75 } 76 ierr = CeedFree(&source_path_with_name); CeedChk(ierr); 77 78 return CEED_ERROR_SUCCESS; 79 } 80 81 /** 82 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 83 CeedOperator functionality 84 85 @param op CeedOperator to create fallback for 86 87 @return An error code: 0 - success, otherwise - failure 88 89 @ref Developer 90 **/ 91 int CeedOperatorCreateFallback(CeedOperator op) { 92 int ierr; 93 Ceed ceed_fallback; 94 95 // Check not already created 96 if (op->op_fallback) return CEED_ERROR_SUCCESS; 97 98 // Fallback Ceed 99 ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr); 100 101 // Clone Op 102 CeedOperator op_fallback; 103 if (op->is_composite) { 104 ierr = CeedCompositeOperatorCreate(ceed_fallback, &op_fallback); 105 CeedChk(ierr); 106 for (CeedInt i = 0; i < op->num_suboperators; i++) { 107 ierr = CeedCompositeOperatorAddSub(op_fallback, op->sub_operators[i]); 108 CeedChk(ierr); 109 } 110 } else { 111 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 112 ierr = CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback); 113 CeedChk(ierr); 114 ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback); 115 CeedChk(ierr); 116 ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback); 117 CeedChk(ierr); 118 ierr = CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, 119 dqfT_fallback, &op_fallback); CeedChk(ierr); 120 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 121 ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, 122 op->input_fields[i]->elem_restr, 123 op->input_fields[i]->basis, 124 op->input_fields[i]->vec); CeedChk(ierr); 125 } 126 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 127 ierr = CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, 128 op->output_fields[i]->elem_restr, 129 op->output_fields[i]->basis, 130 op->output_fields[i]->vec); CeedChk(ierr); 131 } 132 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 133 &op_fallback->qf_assembled); CeedChk(ierr); 134 if (op_fallback->num_qpts == 0) { 135 ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts); 136 CeedChk(ierr); 137 } 138 // Cleanup 139 ierr = CeedQFunctionDestroy(&qf_fallback); CeedChk(ierr); 140 ierr = CeedQFunctionDestroy(&dqf_fallback); CeedChk(ierr); 141 ierr = CeedQFunctionDestroy(&dqfT_fallback); CeedChk(ierr); 142 } 143 ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr); 144 ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr); 145 op->op_fallback = op_fallback; 146 147 return CEED_ERROR_SUCCESS; 148 } 149 150 /** 151 @brief Select correct basis matrix pointer based on CeedEvalMode 152 153 @param[in] eval_mode Current basis evaluation mode 154 @param[in] identity Pointer to identity matrix 155 @param[in] interp Pointer to interpolation matrix 156 @param[in] grad Pointer to gradient matrix 157 @param[out] basis_ptr Basis pointer to set 158 159 @ref Developer 160 **/ 161 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 162 const CeedScalar *identity, const CeedScalar *interp, 163 const CeedScalar *grad, const CeedScalar **basis_ptr) { 164 switch (eval_mode) { 165 case CEED_EVAL_NONE: 166 *basis_ptr = identity; 167 break; 168 case CEED_EVAL_INTERP: 169 *basis_ptr = interp; 170 break; 171 case CEED_EVAL_GRAD: 172 *basis_ptr = grad; 173 break; 174 case CEED_EVAL_WEIGHT: 175 case CEED_EVAL_DIV: 176 case CEED_EVAL_CURL: 177 break; // Caught by QF Assembly 178 } 179 } 180 181 /** 182 @brief Create point block restriction for active operator field 183 184 @param[in] rstr Original CeedElemRestriction for active field 185 @param[out] pointblock_rstr Address of the variable where the newly created 186 CeedElemRestriction will be stored 187 188 @return An error code: 0 - success, otherwise - failure 189 190 @ref Developer 191 **/ 192 static int CeedOperatorCreateActivePointBlockRestriction( 193 CeedElemRestriction rstr, 194 CeedElemRestriction *pointblock_rstr) { 195 int ierr; 196 Ceed ceed; 197 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 198 const CeedInt *offsets; 199 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 200 CeedChk(ierr); 201 202 // Expand offsets 203 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 204 *pointblock_offsets; 205 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 206 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 207 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 208 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 209 CeedInt shift = num_comp; 210 if (comp_stride != 1) 211 shift *= num_comp; 212 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 213 CeedChk(ierr); 214 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 215 pointblock_offsets[i] = offsets[i]*shift; 216 if (pointblock_offsets[i] > max) 217 max = pointblock_offsets[i]; 218 } 219 220 // Create new restriction 221 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 222 1, max + num_comp*num_comp, CEED_MEM_HOST, 223 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 224 CeedChk(ierr); 225 226 // Cleanup 227 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 228 229 return CEED_ERROR_SUCCESS; 230 } 231 232 /** 233 @brief Core logic for assembling operator diagonal or point block diagonal 234 235 @param[in] op CeedOperator to assemble point block diagonal 236 @param[in] request Address of CeedRequest for non-blocking completion, else 237 CEED_REQUEST_IMMEDIATE 238 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 239 @param[out] assembled CeedVector to store assembled diagonal 240 241 @return An error code: 0 - success, otherwise - failure 242 243 @ref Developer 244 **/ 245 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 246 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 247 int ierr; 248 Ceed ceed; 249 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 250 251 // Assemble QFunction 252 CeedQFunction qf; 253 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 254 CeedInt num_input_fields, num_output_fields; 255 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 256 CeedChk(ierr); 257 CeedVector assembled_qf; 258 CeedElemRestriction rstr; 259 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 260 &rstr, request); CeedChk(ierr); 261 CeedInt layout[3]; 262 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 263 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 264 CeedScalar max_norm = 0; 265 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 266 267 // Determine active input basis 268 CeedOperatorField *op_fields; 269 CeedQFunctionField *qf_fields; 270 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 271 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 272 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 273 CeedEvalMode *eval_mode_in = NULL; 274 CeedBasis basis_in = NULL; 275 CeedElemRestriction rstr_in = NULL; 276 for (CeedInt i=0; i<num_input_fields; i++) { 277 CeedVector vec; 278 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 279 if (vec == CEED_VECTOR_ACTIVE) { 280 CeedElemRestriction rstr; 281 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 282 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 283 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 284 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 285 if (rstr_in && rstr_in != rstr) 286 // LCOV_EXCL_START 287 return CeedError(ceed, CEED_ERROR_BACKEND, 288 "Multi-field non-composite operator diagonal assembly not supported"); 289 // LCOV_EXCL_STOP 290 rstr_in = rstr; 291 CeedEvalMode eval_mode; 292 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 293 switch (eval_mode) { 294 case CEED_EVAL_NONE: 295 case CEED_EVAL_INTERP: 296 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 297 eval_mode_in[num_eval_mode_in] = eval_mode; 298 num_eval_mode_in += 1; 299 break; 300 case CEED_EVAL_GRAD: 301 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 302 for (CeedInt d=0; d<dim; d++) 303 eval_mode_in[num_eval_mode_in+d] = eval_mode; 304 num_eval_mode_in += dim; 305 break; 306 case CEED_EVAL_WEIGHT: 307 case CEED_EVAL_DIV: 308 case CEED_EVAL_CURL: 309 break; // Caught by QF Assembly 310 } 311 } 312 } 313 314 // Determine active output basis 315 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 316 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 317 CeedInt num_eval_mode_out = 0; 318 CeedEvalMode *eval_mode_out = NULL; 319 CeedBasis basis_out = NULL; 320 CeedElemRestriction rstr_out = NULL; 321 for (CeedInt i=0; i<num_output_fields; i++) { 322 CeedVector vec; 323 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 324 if (vec == CEED_VECTOR_ACTIVE) { 325 CeedElemRestriction rstr; 326 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 327 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 328 if (rstr_out && rstr_out != rstr) 329 // LCOV_EXCL_START 330 return CeedError(ceed, CEED_ERROR_BACKEND, 331 "Multi-field non-composite operator diagonal assembly not supported"); 332 // LCOV_EXCL_STOP 333 rstr_out = rstr; 334 CeedEvalMode eval_mode; 335 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 336 switch (eval_mode) { 337 case CEED_EVAL_NONE: 338 case CEED_EVAL_INTERP: 339 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 340 eval_mode_out[num_eval_mode_out] = eval_mode; 341 num_eval_mode_out += 1; 342 break; 343 case CEED_EVAL_GRAD: 344 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 345 for (CeedInt d=0; d<dim; d++) 346 eval_mode_out[num_eval_mode_out+d] = eval_mode; 347 num_eval_mode_out += dim; 348 break; 349 case CEED_EVAL_WEIGHT: 350 case CEED_EVAL_DIV: 351 case CEED_EVAL_CURL: 352 break; // Caught by QF Assembly 353 } 354 } 355 } 356 357 // Assemble point block diagonal restriction, if needed 358 CeedElemRestriction diag_rstr = rstr_out; 359 if (is_pointblock) { 360 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 361 CeedChk(ierr); 362 } 363 364 // Create diagonal vector 365 CeedVector elem_diag; 366 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 367 CeedChk(ierr); 368 369 // Assemble element operator diagonals 370 CeedScalar *elem_diag_array; 371 const CeedScalar *assembled_qf_array; 372 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 373 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 374 CeedChk(ierr); 375 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 376 CeedChk(ierr); 377 CeedInt num_elem, num_nodes, num_qpts; 378 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 379 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 380 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 381 // Basis matrices 382 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 383 CeedScalar *identity = NULL; 384 bool evalNone = false; 385 for (CeedInt i=0; i<num_eval_mode_in; i++) 386 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 387 for (CeedInt i=0; i<num_eval_mode_out; i++) 388 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 389 if (evalNone) { 390 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 391 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 392 identity[i*num_nodes+i] = 1.0; 393 } 394 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 395 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 396 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 397 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 398 // Compute the diagonal of B^T D B 399 // Each element 400 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 401 for (CeedInt e=0; e<num_elem; e++) { 402 CeedInt d_out = -1; 403 // Each basis eval mode pair 404 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 405 const CeedScalar *bt = NULL; 406 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 407 d_out += 1; 408 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 409 &grad_out[d_out*num_qpts*num_nodes], &bt); 410 CeedInt d_in = -1; 411 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 412 const CeedScalar *b = NULL; 413 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 414 d_in += 1; 415 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 416 &grad_in[d_in*num_qpts*num_nodes], &b); 417 // Each component 418 for (CeedInt c_out=0; c_out<num_comp; c_out++) 419 // Each qpoint/node pair 420 for (CeedInt q=0; q<num_qpts; q++) 421 if (is_pointblock) { 422 // Point Block Diagonal 423 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 424 const CeedScalar qf_value = 425 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 426 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 427 if (fabs(qf_value) > qf_value_bound) 428 for (CeedInt n=0; n<num_nodes; n++) 429 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 430 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 431 } 432 } else { 433 // Diagonal Only 434 const CeedScalar qf_value = 435 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 436 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 437 if (fabs(qf_value) > qf_value_bound) 438 for (CeedInt n=0; n<num_nodes; n++) 439 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 440 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 441 } 442 } 443 } 444 } 445 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 446 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 447 CeedChk(ierr); 448 449 // Assemble local operator diagonal 450 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 451 assembled, request); CeedChk(ierr); 452 453 // Cleanup 454 if (is_pointblock) { 455 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 456 } 457 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 458 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 459 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 460 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 461 ierr = CeedFree(&identity); CeedChk(ierr); 462 463 return CEED_ERROR_SUCCESS; 464 } 465 466 /** 467 @brief Core logic for assembling composite operator diagonal 468 469 @param[in] op CeedOperator to assemble point block diagonal 470 @param[in] request Address of CeedRequest for non-blocking completion, else 471 CEED_REQUEST_IMMEDIATE 472 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 473 @param[out] assembled CeedVector to store assembled diagonal 474 475 @return An error code: 0 - success, otherwise - failure 476 477 @ref Developer 478 **/ 479 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 480 CeedOperator op, CeedRequest *request, const bool is_pointblock, 481 CeedVector assembled) { 482 int ierr; 483 CeedInt num_sub; 484 CeedOperator *suboperators; 485 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 486 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 487 for (CeedInt i = 0; i < num_sub; i++) { 488 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 489 is_pointblock, assembled); CeedChk(ierr); 490 } 491 return CEED_ERROR_SUCCESS; 492 } 493 494 /** 495 @brief Build nonzero pattern for non-composite operator 496 497 Users should generally use CeedOperatorLinearAssembleSymbolic() 498 499 @param[in] op CeedOperator to assemble nonzero pattern 500 @param[in] offset Offset for number of entries 501 @param[out] rows Row number for each entry 502 @param[out] cols Column number for each entry 503 504 @return An error code: 0 - success, otherwise - failure 505 506 @ref Developer 507 **/ 508 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 509 CeedInt *rows, CeedInt *cols) { 510 int ierr; 511 Ceed ceed = op->ceed; 512 if (op->is_composite) 513 // LCOV_EXCL_START 514 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 515 "Composite operator not supported"); 516 // LCOV_EXCL_STOP 517 518 CeedSize num_nodes; 519 ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr); 520 CeedElemRestriction rstr_in; 521 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 522 CeedInt num_elem, elem_size, num_comp; 523 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 524 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 525 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 526 CeedInt layout_er[3]; 527 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 528 529 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 530 531 // Determine elem_dof relation 532 CeedVector index_vec; 533 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 534 CeedScalar *array; 535 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 536 for (CeedInt i = 0; i < num_nodes; ++i) { 537 array[i] = i; 538 } 539 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 540 CeedVector elem_dof; 541 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 542 CeedChk(ierr); 543 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 544 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 545 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 546 const CeedScalar *elem_dof_a; 547 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 548 CeedChk(ierr); 549 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 550 551 // Determine i, j locations for element matrices 552 CeedInt count = 0; 553 for (CeedInt e = 0; e < num_elem; ++e) { 554 for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) { 555 for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) { 556 for (CeedInt i = 0; i < elem_size; ++i) { 557 for (CeedInt j = 0; j < elem_size; ++j) { 558 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 559 (comp_out)*layout_er[1] + e*layout_er[2]; 560 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 561 (comp_in)*layout_er[1] + e*layout_er[2]; 562 563 const CeedInt row = elem_dof_a[elem_dof_index_row]; 564 const CeedInt col = elem_dof_a[elem_dof_index_col]; 565 566 rows[offset + count] = row; 567 cols[offset + count] = col; 568 count++; 569 } 570 } 571 } 572 } 573 } 574 if (count != local_num_entries) 575 // LCOV_EXCL_START 576 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 577 // LCOV_EXCL_STOP 578 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 579 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 580 581 return CEED_ERROR_SUCCESS; 582 } 583 584 /** 585 @brief Assemble nonzero entries for non-composite operator 586 587 Users should generally use CeedOperatorLinearAssemble() 588 589 @param[in] op CeedOperator to assemble 590 @param[out] offset Offest for number of entries 591 @param[out] values Values to assemble into matrix 592 593 @return An error code: 0 - success, otherwise - failure 594 595 @ref Developer 596 **/ 597 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 598 CeedVector values) { 599 int ierr; 600 Ceed ceed = op->ceed; 601 if (op->is_composite) 602 // LCOV_EXCL_START 603 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 604 "Composite operator not supported"); 605 // LCOV_EXCL_STOP 606 607 // Assemble QFunction 608 CeedQFunction qf; 609 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 610 CeedVector assembled_qf; 611 CeedElemRestriction rstr_q; 612 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 613 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 614 615 CeedSize qf_length; 616 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 617 618 CeedInt num_input_fields, num_output_fields; 619 CeedOperatorField *input_fields; 620 CeedOperatorField *output_fields; 621 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 622 &num_output_fields, &output_fields); CeedChk(ierr); 623 624 // Determine active input basis 625 CeedQFunctionField *qf_fields; 626 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 627 CeedInt num_eval_mode_in = 0, dim = 1; 628 CeedEvalMode *eval_mode_in = NULL; 629 CeedBasis basis_in = NULL; 630 CeedElemRestriction rstr_in = NULL; 631 for (CeedInt i=0; i<num_input_fields; i++) { 632 CeedVector vec; 633 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 634 if (vec == CEED_VECTOR_ACTIVE) { 635 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 636 CeedChk(ierr); 637 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 638 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 639 CeedChk(ierr); 640 CeedEvalMode eval_mode; 641 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 642 CeedChk(ierr); 643 switch (eval_mode) { 644 case CEED_EVAL_NONE: 645 case CEED_EVAL_INTERP: 646 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 647 eval_mode_in[num_eval_mode_in] = eval_mode; 648 num_eval_mode_in += 1; 649 break; 650 case CEED_EVAL_GRAD: 651 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 652 for (CeedInt d=0; d<dim; d++) { 653 eval_mode_in[num_eval_mode_in+d] = eval_mode; 654 } 655 num_eval_mode_in += dim; 656 break; 657 case CEED_EVAL_WEIGHT: 658 case CEED_EVAL_DIV: 659 case CEED_EVAL_CURL: 660 break; // Caught by QF Assembly 661 } 662 } 663 } 664 665 // Determine active output basis 666 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 667 CeedInt num_eval_mode_out = 0; 668 CeedEvalMode *eval_mode_out = NULL; 669 CeedBasis basis_out = NULL; 670 CeedElemRestriction rstr_out = NULL; 671 for (CeedInt i=0; i<num_output_fields; i++) { 672 CeedVector vec; 673 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 674 if (vec == CEED_VECTOR_ACTIVE) { 675 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 676 CeedChk(ierr); 677 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 678 CeedChk(ierr); 679 CeedEvalMode eval_mode; 680 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 681 CeedChk(ierr); 682 switch (eval_mode) { 683 case CEED_EVAL_NONE: 684 case CEED_EVAL_INTERP: 685 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 686 eval_mode_out[num_eval_mode_out] = eval_mode; 687 num_eval_mode_out += 1; 688 break; 689 case CEED_EVAL_GRAD: 690 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 691 for (CeedInt d=0; d<dim; d++) { 692 eval_mode_out[num_eval_mode_out+d] = eval_mode; 693 } 694 num_eval_mode_out += dim; 695 break; 696 case CEED_EVAL_WEIGHT: 697 case CEED_EVAL_DIV: 698 case CEED_EVAL_CURL: 699 break; // Caught by QF Assembly 700 } 701 } 702 } 703 704 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 705 // LCOV_EXCL_START 706 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 707 "Cannot assemble operator with out inputs/outputs"); 708 // LCOV_EXCL_STOP 709 710 CeedInt num_elem, elem_size, num_qpts, num_comp; 711 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 712 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 713 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 714 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 715 716 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 717 718 // loop over elements and put in data structure 719 const CeedScalar *interp_in, *grad_in; 720 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 721 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 722 723 const CeedScalar *assembled_qf_array; 724 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 725 CeedChk(ierr); 726 727 CeedInt layout_qf[3]; 728 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 729 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 730 731 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 732 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 733 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 734 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 735 num_qpts]; // logically 3-tensor 736 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 737 CeedScalar elem_mat[elem_size * elem_size]; 738 CeedInt count = 0; 739 CeedScalar *vals; 740 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 741 for (CeedInt e = 0; e < num_elem; ++e) { 742 for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) { 743 for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) { 744 for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 745 B_mat_in[ell] = 0.0; 746 } 747 for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 748 B_mat_out[ell] = 0.0; 749 } 750 // Store block-diagonal D matrix as collection of small dense blocks 751 for (CeedInt ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; 752 ++ell) { 753 D_mat[ell] = 0.0; 754 } 755 // form element matrix itself (for each block component) 756 for (CeedInt ell = 0; ell < elem_size*elem_size; ++ell) { 757 elem_mat[ell] = 0.0; 758 } 759 for (CeedInt q = 0; q < num_qpts; ++q) { 760 for (CeedInt n = 0; n < elem_size; ++n) { 761 CeedInt d_in = -1; 762 for (CeedInt e_in = 0; e_in < num_eval_mode_in; ++e_in) { 763 const CeedInt qq = num_eval_mode_in*q; 764 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 765 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 766 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 767 d_in += 1; 768 B_mat_in[(qq+e_in)*elem_size + n] += 769 grad_in[(d_in*num_qpts+q) * elem_size + n]; 770 } else { 771 // LCOV_EXCL_START 772 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 773 // LCOV_EXCL_STOP 774 } 775 } 776 CeedInt d_out = -1; 777 for (CeedInt e_out = 0; e_out < num_eval_mode_out; ++e_out) { 778 const CeedInt qq = num_eval_mode_out*q; 779 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 780 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 781 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 782 d_out += 1; 783 B_mat_out[(qq+e_out)*elem_size + n] += 784 grad_in[(d_out*num_qpts+q) * elem_size + n]; 785 } else { 786 // LCOV_EXCL_START 787 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 788 // LCOV_EXCL_STOP 789 } 790 } 791 } 792 for (CeedInt ei = 0; ei < num_eval_mode_out; ++ei) { 793 for (CeedInt ej = 0; ej < num_eval_mode_in; ++ej) { 794 const CeedInt eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej) 795 *num_comp 796 +comp_out; 797 const CeedInt index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 798 e*layout_qf[2]; 799 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 800 } 801 } 802 } 803 // Compute B^T*D 804 for (CeedInt ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 805 BTD[ell] = 0.0; 806 } 807 for (CeedInt j = 0; j<elem_size; ++j) { 808 for (CeedInt q = 0; q<num_qpts; ++q) { 809 const CeedInt qq = num_eval_mode_out*q; 810 for (CeedInt ei = 0; ei < num_eval_mode_in; ++ei) { 811 for (CeedInt ej = 0; ej < num_eval_mode_out; ++ej) { 812 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 813 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 814 } 815 } 816 } 817 } 818 819 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 820 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 821 822 // put element matrix in coordinate data structure 823 for (CeedInt i = 0; i < elem_size; ++i) { 824 for (CeedInt j = 0; j < elem_size; ++j) { 825 vals[offset + count] = elem_mat[i*elem_size + j]; 826 count++; 827 } 828 } 829 } 830 } 831 } 832 if (count != local_num_entries) 833 // LCOV_EXCL_START 834 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 835 // LCOV_EXCL_STOP 836 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 837 838 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 839 CeedChk(ierr); 840 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 841 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 842 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 843 844 return CEED_ERROR_SUCCESS; 845 } 846 847 /** 848 @brief Count number of entries for assembled CeedOperator 849 850 @param[in] op CeedOperator to assemble 851 @param[out] num_entries Number of entries in assembled representation 852 853 @return An error code: 0 - success, otherwise - failure 854 855 @ref Utility 856 **/ 857 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 858 CeedInt *num_entries) { 859 int ierr; 860 CeedElemRestriction rstr; 861 CeedInt num_elem, elem_size, num_comp; 862 863 if (op->is_composite) 864 // LCOV_EXCL_START 865 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 866 "Composite operator not supported"); 867 // LCOV_EXCL_STOP 868 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 869 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 870 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 871 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 872 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 873 874 return CEED_ERROR_SUCCESS; 875 } 876 877 /** 878 @brief Common code for creating a multigrid coarse operator and level 879 transfer operators for a CeedOperator 880 881 @param[in] op_fine Fine grid operator 882 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 883 @param[in] rstr_coarse Coarse grid restriction 884 @param[in] basis_coarse Coarse grid active vector basis 885 @param[in] basis_c_to_f Basis for coarse to fine interpolation 886 @param[out] op_coarse Coarse grid operator 887 @param[out] op_prolong Coarse to fine operator 888 @param[out] op_restrict Fine to coarse operator 889 890 @return An error code: 0 - success, otherwise - failure 891 892 @ref Developer 893 **/ 894 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 895 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 896 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 897 CeedOperator *op_restrict) { 898 int ierr; 899 Ceed ceed; 900 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 901 902 // Check for composite operator 903 bool is_composite; 904 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 905 if (is_composite) 906 // LCOV_EXCL_START 907 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 908 "Automatic multigrid setup for composite operators not supported"); 909 // LCOV_EXCL_STOP 910 911 // Coarse Grid 912 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 913 op_coarse); CeedChk(ierr); 914 CeedElemRestriction rstr_fine = NULL; 915 // -- Clone input fields 916 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 917 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 918 rstr_fine = op_fine->input_fields[i]->elem_restr; 919 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 920 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 921 CeedChk(ierr); 922 } else { 923 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 924 op_fine->input_fields[i]->elem_restr, 925 op_fine->input_fields[i]->basis, 926 op_fine->input_fields[i]->vec); CeedChk(ierr); 927 } 928 } 929 // -- Clone output fields 930 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 931 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 932 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 933 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 934 CeedChk(ierr); 935 } else { 936 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 937 op_fine->output_fields[i]->elem_restr, 938 op_fine->output_fields[i]->basis, 939 op_fine->output_fields[i]->vec); CeedChk(ierr); 940 } 941 } 942 // -- Clone QFunctionAssemblyData 943 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 944 &(*op_coarse)->qf_assembled); CeedChk(ierr); 945 946 // Multiplicity vector 947 CeedVector mult_vec, mult_e_vec; 948 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 949 CeedChk(ierr); 950 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 951 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 952 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 953 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 954 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 955 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 956 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 957 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 958 959 // Restriction 960 CeedInt num_comp; 961 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 962 CeedQFunction qf_restrict; 963 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 964 CeedChk(ierr); 965 CeedInt *num_comp_r_data; 966 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 967 num_comp_r_data[0] = num_comp; 968 CeedQFunctionContext ctx_r; 969 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 970 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 971 sizeof(*num_comp_r_data), num_comp_r_data); 972 CeedChk(ierr); 973 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 974 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 975 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 976 CeedChk(ierr); 977 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 978 CeedChk(ierr); 979 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 980 CEED_EVAL_INTERP); CeedChk(ierr); 981 ierr = CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp); CeedChk(ierr); 982 983 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 984 CEED_QFUNCTION_NONE, op_restrict); 985 CeedChk(ierr); 986 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 987 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 988 CeedChk(ierr); 989 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 990 CEED_BASIS_COLLOCATED, mult_vec); 991 CeedChk(ierr); 992 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 993 CEED_VECTOR_ACTIVE); CeedChk(ierr); 994 995 // Prolongation 996 CeedQFunction qf_prolong; 997 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 998 CeedChk(ierr); 999 CeedInt *num_comp_p_data; 1000 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 1001 num_comp_p_data[0] = num_comp; 1002 CeedQFunctionContext ctx_p; 1003 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 1004 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 1005 sizeof(*num_comp_p_data), num_comp_p_data); 1006 CeedChk(ierr); 1007 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 1008 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 1009 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 1010 CeedChk(ierr); 1011 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 1012 CeedChk(ierr); 1013 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 1014 CeedChk(ierr); 1015 ierr = CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp); CeedChk(ierr); 1016 1017 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 1018 CEED_QFUNCTION_NONE, op_prolong); 1019 CeedChk(ierr); 1020 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 1021 CEED_VECTOR_ACTIVE); CeedChk(ierr); 1022 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 1023 CEED_BASIS_COLLOCATED, mult_vec); 1024 CeedChk(ierr); 1025 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 1026 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 1027 CeedChk(ierr); 1028 1029 // Clone name 1030 bool has_name = op_fine->name; 1031 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 1032 ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr); 1033 { 1034 char *prolongation_name; 1035 ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr); 1036 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", 1037 op_fine->name); 1038 ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr); 1039 ierr = CeedFree(&prolongation_name); CeedChk(ierr); 1040 } 1041 { 1042 char *restriction_name; 1043 ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr); 1044 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", 1045 op_fine->name); 1046 ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr); 1047 ierr = CeedFree(&restriction_name); CeedChk(ierr); 1048 } 1049 1050 // Cleanup 1051 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 1052 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 1053 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 1054 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 1055 1056 return CEED_ERROR_SUCCESS; 1057 } 1058 1059 /** 1060 @brief Build 1D mass matrix and Laplacian with perturbation 1061 1062 @param[in] interp_1d Interpolation matrix in one dimension 1063 @param[in] grad_1d Gradient matrix in one dimension 1064 @param[in] q_weight_1d Quadrature weights in one dimension 1065 @param[in] P_1d Number of basis nodes in one dimension 1066 @param[in] Q_1d Number of quadrature points in one dimension 1067 @param[in] dim Dimension of basis 1068 @param[out] mass Assembled mass matrix in one dimension 1069 @param[out] laplace Assembled perturbed Laplacian in one dimension 1070 1071 @return An error code: 0 - success, otherwise - failure 1072 1073 @ref Developer 1074 **/ 1075 CeedPragmaOptimizeOff 1076 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 1077 const CeedScalar *grad_1d, 1078 const CeedScalar *q_weight_1d, CeedInt P_1d, 1079 CeedInt Q_1d, CeedInt dim, 1080 CeedScalar *mass, CeedScalar *laplace) { 1081 1082 for (CeedInt i=0; i<P_1d; i++) 1083 for (CeedInt j=0; j<P_1d; j++) { 1084 CeedScalar sum = 0.0; 1085 for (CeedInt k=0; k<Q_1d; k++) 1086 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1087 mass[i+j*P_1d] = sum; 1088 } 1089 // -- Laplacian 1090 for (CeedInt i=0; i<P_1d; i++) 1091 for (CeedInt j=0; j<P_1d; j++) { 1092 CeedScalar sum = 0.0; 1093 for (CeedInt k=0; k<Q_1d; k++) 1094 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1095 laplace[i+j*P_1d] = sum; 1096 } 1097 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1098 for (CeedInt i=0; i<P_1d; i++) 1099 laplace[i+P_1d*i] += perturbation; 1100 return CEED_ERROR_SUCCESS; 1101 } 1102 CeedPragmaOptimizeOn 1103 1104 /// @} 1105 1106 /// ---------------------------------------------------------------------------- 1107 /// CeedOperator Backend API 1108 /// ---------------------------------------------------------------------------- 1109 /// @addtogroup CeedOperatorBackend 1110 /// @{ 1111 1112 /** 1113 @brief Create object holding CeedQFunction assembly data for CeedOperator 1114 1115 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1116 @param[out] data Address of the variable where the newly created 1117 CeedQFunctionAssemblyData will be stored 1118 1119 @return An error code: 0 - success, otherwise - failure 1120 1121 @ref Backend 1122 **/ 1123 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1124 CeedQFunctionAssemblyData *data) { 1125 int ierr; 1126 1127 ierr = CeedCalloc(1, data); CeedChk(ierr); 1128 (*data)->ref_count = 1; 1129 (*data)->ceed = ceed; 1130 ierr = CeedReference(ceed); CeedChk(ierr); 1131 1132 return CEED_ERROR_SUCCESS; 1133 } 1134 1135 /** 1136 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1137 1138 @param data CeedQFunctionAssemblyData to increment the reference counter 1139 1140 @return An error code: 0 - success, otherwise - failure 1141 1142 @ref Backend 1143 **/ 1144 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1145 data->ref_count++; 1146 return CEED_ERROR_SUCCESS; 1147 } 1148 1149 /** 1150 @brief Set re-use of CeedQFunctionAssemblyData 1151 1152 @param data CeedQFunctionAssemblyData to mark for reuse 1153 @param reuse_data Boolean flag indicating data re-use 1154 1155 @return An error code: 0 - success, otherwise - failure 1156 1157 @ref Backend 1158 **/ 1159 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, 1160 bool reuse_data) { 1161 data->reuse_data = reuse_data; 1162 data->needs_data_update = true; 1163 return CEED_ERROR_SUCCESS; 1164 } 1165 1166 /** 1167 @brief Mark QFunctionAssemblyData as stale 1168 1169 @param data CeedQFunctionAssemblyData to mark as stale 1170 @param needs_data_update Boolean flag indicating if update is needed or completed 1171 1172 @return An error code: 0 - success, otherwise - failure 1173 1174 @ref Backend 1175 **/ 1176 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, 1177 bool needs_data_update) { 1178 data->needs_data_update = needs_data_update; 1179 return CEED_ERROR_SUCCESS; 1180 } 1181 1182 /** 1183 @brief Determine if QFunctionAssemblyData needs update 1184 1185 @param[in] data CeedQFunctionAssemblyData to mark as stale 1186 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1187 1188 @return An error code: 0 - success, otherwise - failure 1189 1190 @ref Backend 1191 **/ 1192 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1193 bool *is_update_needed) { 1194 *is_update_needed = !data->reuse_data || data->needs_data_update; 1195 return CEED_ERROR_SUCCESS; 1196 } 1197 1198 /** 1199 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1200 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1201 Note: If `*data_copy` is non-NULL, then it is assumed that 1202 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1203 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1204 the only reference to this CeedQFunctionAssemblyData. 1205 1206 @param data CeedQFunctionAssemblyData to copy reference to 1207 @param[out] data_copy Variable to store copied reference 1208 1209 @return An error code: 0 - success, otherwise - failure 1210 1211 @ref Backend 1212 **/ 1213 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1214 CeedQFunctionAssemblyData *data_copy) { 1215 int ierr; 1216 1217 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1218 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1219 *data_copy = data; 1220 return CEED_ERROR_SUCCESS; 1221 } 1222 1223 /** 1224 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1225 1226 @param[in] data CeedQFunctionAssemblyData to retreive status 1227 @param[out] is_setup Boolean flag for setup status 1228 1229 @return An error code: 0 - success, otherwise - failure 1230 1231 @ref Backend 1232 **/ 1233 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1234 bool *is_setup) { 1235 *is_setup = data->is_setup; 1236 return CEED_ERROR_SUCCESS; 1237 } 1238 1239 /** 1240 @brief Set internal objects for CeedQFunctionAssemblyData 1241 1242 @param[in] data CeedQFunctionAssemblyData to set objects 1243 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1244 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1245 1246 @return An error code: 0 - success, otherwise - failure 1247 1248 @ref Backend 1249 **/ 1250 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1251 CeedVector vec, CeedElemRestriction rstr) { 1252 int ierr; 1253 1254 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1255 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1256 1257 data->is_setup = true; 1258 return CEED_ERROR_SUCCESS; 1259 } 1260 1261 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1262 CeedVector *vec, CeedElemRestriction *rstr) { 1263 int ierr; 1264 1265 if (!data->is_setup) 1266 // LCOV_EXCL_START 1267 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1268 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1269 // LCOV_EXCL_STOP 1270 1271 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1272 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1273 1274 return CEED_ERROR_SUCCESS; 1275 } 1276 1277 /** 1278 @brief Destroy CeedQFunctionAssemblyData 1279 1280 @param[out] data CeedQFunctionAssemblyData to destroy 1281 1282 @return An error code: 0 - success, otherwise - failure 1283 1284 @ref Backend 1285 **/ 1286 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1287 int ierr; 1288 1289 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1290 1291 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1292 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1293 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1294 1295 ierr = CeedFree(data); CeedChk(ierr); 1296 return CEED_ERROR_SUCCESS; 1297 } 1298 1299 /// @} 1300 1301 /// ---------------------------------------------------------------------------- 1302 /// CeedOperator Public API 1303 /// ---------------------------------------------------------------------------- 1304 /// @addtogroup CeedOperatorUser 1305 /// @{ 1306 1307 /** 1308 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1309 1310 This returns a CeedVector containing a matrix at each quadrature point 1311 providing the action of the CeedQFunction associated with the CeedOperator. 1312 The vector 'assembled' is of shape 1313 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1314 and contains column-major matrices representing the action of the 1315 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1316 outputs are in the order provided by the user when adding CeedOperator fields. 1317 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1318 'v', provided in that order, would result in an assembled QFunction that 1319 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1320 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1321 1322 Note: Calling this function asserts that setup is complete 1323 and sets the CeedOperator as immutable. 1324 1325 @param op CeedOperator to assemble CeedQFunction 1326 @param[out] assembled CeedVector to store assembled CeedQFunction at 1327 quadrature points 1328 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1329 CeedQFunction 1330 @param request Address of CeedRequest for non-blocking completion, else 1331 @ref CEED_REQUEST_IMMEDIATE 1332 1333 @return An error code: 0 - success, otherwise - failure 1334 1335 @ref User 1336 **/ 1337 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1338 CeedElemRestriction *rstr, 1339 CeedRequest *request) { 1340 int ierr; 1341 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1342 1343 // Backend version 1344 if (op->LinearAssembleQFunction) { 1345 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1346 CeedChk(ierr); 1347 } else { 1348 // Fallback to reference Ceed 1349 if (!op->op_fallback) { 1350 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1351 } 1352 // Assemble 1353 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1354 rstr, request); CeedChk(ierr); 1355 } 1356 return CEED_ERROR_SUCCESS; 1357 } 1358 1359 /** 1360 @brief Assemble CeedQFunction and store result internall. Return copied 1361 references of stored data to the caller. Caller is responsible for 1362 ownership and destruction of the copied references. See also 1363 @ref CeedOperatorLinearAssembleQFunction 1364 1365 @param op CeedOperator to assemble CeedQFunction 1366 @param assembled CeedVector to store assembled CeedQFunction at 1367 quadrature points 1368 @param rstr CeedElemRestriction for CeedVector containing assembled 1369 CeedQFunction 1370 @param request Address of CeedRequest for non-blocking completion, else 1371 @ref CEED_REQUEST_IMMEDIATE 1372 1373 @return An error code: 0 - success, otherwise - failure 1374 1375 @ref User 1376 **/ 1377 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1378 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1379 int ierr; 1380 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1381 1382 // Backend version 1383 if (op->LinearAssembleQFunctionUpdate) { 1384 bool qf_assembled_is_setup; 1385 CeedVector assembled_vec = NULL; 1386 CeedElemRestriction assembled_rstr = NULL; 1387 1388 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1389 &qf_assembled_is_setup); CeedChk(ierr); 1390 if (qf_assembled_is_setup) { 1391 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1392 &assembled_rstr); CeedChk(ierr); 1393 1394 bool update_needed; 1395 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1396 &update_needed); CeedChk(ierr); 1397 if (update_needed) { 1398 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1399 request); CeedChk(ierr); 1400 } 1401 } else { 1402 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1403 request); CeedChk(ierr); 1404 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1405 assembled_rstr); CeedChk(ierr); 1406 } 1407 ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); 1408 CeedChk(ierr); 1409 1410 // Copy reference to internally held copy 1411 *assembled = NULL; 1412 *rstr = NULL; 1413 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1414 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1415 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1416 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1417 } else { 1418 // Fallback to reference Ceed 1419 if (!op->op_fallback) { 1420 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1421 } 1422 // Assemble 1423 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1424 assembled, rstr, request); CeedChk(ierr); 1425 } 1426 1427 return CEED_ERROR_SUCCESS; 1428 } 1429 1430 /** 1431 @brief Assemble the diagonal of a square linear CeedOperator 1432 1433 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1434 1435 Note: Currently only non-composite CeedOperators with a single field and 1436 composite CeedOperators with single field sub-operators are supported. 1437 1438 Note: Calling this function asserts that setup is complete 1439 and sets the CeedOperator as immutable. 1440 1441 @param op CeedOperator to assemble CeedQFunction 1442 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1443 @param request Address of CeedRequest for non-blocking completion, else 1444 @ref CEED_REQUEST_IMMEDIATE 1445 1446 @return An error code: 0 - success, otherwise - failure 1447 1448 @ref User 1449 **/ 1450 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1451 CeedRequest *request) { 1452 int ierr; 1453 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1454 1455 CeedSize input_size = 0, output_size = 0; 1456 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1457 CeedChk(ierr); 1458 if (input_size != output_size) 1459 // LCOV_EXCL_START 1460 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1461 // LCOV_EXCL_STOP 1462 1463 // Use backend version, if available 1464 if (op->LinearAssembleDiagonal) { 1465 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1466 return CEED_ERROR_SUCCESS; 1467 } else if (op->LinearAssembleAddDiagonal) { 1468 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1469 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1470 return CEED_ERROR_SUCCESS; 1471 } else { 1472 // Check for valid fallback resource 1473 const char *resource, *fallback_resource; 1474 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1475 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1476 CeedChk(ierr); 1477 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1478 // Fallback to reference Ceed 1479 if (!op->op_fallback) { 1480 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1481 } 1482 // Assemble 1483 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1484 CeedChk(ierr); 1485 return CEED_ERROR_SUCCESS; 1486 } 1487 } 1488 1489 // Default interface implementation 1490 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1491 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1492 CeedChk(ierr); 1493 return CEED_ERROR_SUCCESS; 1494 } 1495 1496 /** 1497 @brief Assemble the diagonal of a square linear CeedOperator 1498 1499 This sums into a CeedVector the diagonal of a linear CeedOperator. 1500 1501 Note: Currently only non-composite CeedOperators with a single field and 1502 composite CeedOperators with single field sub-operators are supported. 1503 1504 Note: Calling this function asserts that setup is complete 1505 and sets the CeedOperator as immutable. 1506 1507 @param op CeedOperator to assemble CeedQFunction 1508 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1509 @param request Address of CeedRequest for non-blocking completion, else 1510 @ref CEED_REQUEST_IMMEDIATE 1511 1512 @return An error code: 0 - success, otherwise - failure 1513 1514 @ref User 1515 **/ 1516 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1517 CeedRequest *request) { 1518 int ierr; 1519 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1520 1521 CeedSize input_size = 0, output_size = 0; 1522 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1523 CeedChk(ierr); 1524 if (input_size != output_size) 1525 // LCOV_EXCL_START 1526 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1527 // LCOV_EXCL_STOP 1528 1529 // Use backend version, if available 1530 if (op->LinearAssembleAddDiagonal) { 1531 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1532 return CEED_ERROR_SUCCESS; 1533 } else { 1534 // Check for valid fallback resource 1535 const char *resource, *fallback_resource; 1536 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1537 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1538 CeedChk(ierr); 1539 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1540 // Fallback to reference Ceed 1541 if (!op->op_fallback) { 1542 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1543 } 1544 // Assemble 1545 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1546 request); CeedChk(ierr); 1547 return CEED_ERROR_SUCCESS; 1548 } 1549 } 1550 1551 // Default interface implementation 1552 bool is_composite; 1553 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1554 if (is_composite) { 1555 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1556 false, assembled); CeedChk(ierr); 1557 return CEED_ERROR_SUCCESS; 1558 } else { 1559 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1560 CeedChk(ierr); 1561 return CEED_ERROR_SUCCESS; 1562 } 1563 } 1564 1565 /** 1566 @brief Assemble the point block diagonal of a square linear CeedOperator 1567 1568 This overwrites a CeedVector with the point block diagonal of a linear 1569 CeedOperator. 1570 1571 Note: Currently only non-composite CeedOperators with a single field and 1572 composite CeedOperators with single field sub-operators are supported. 1573 1574 Note: Calling this function asserts that setup is complete 1575 and sets the CeedOperator as immutable. 1576 1577 @param op CeedOperator to assemble CeedQFunction 1578 @param[out] assembled CeedVector to store assembled CeedOperator point block 1579 diagonal, provided in row-major form with an 1580 @a num_comp * @a num_comp block at each node. The dimensions 1581 of this vector are derived from the active vector 1582 for the CeedOperator. The array has shape 1583 [nodes, component out, component in]. 1584 @param request Address of CeedRequest for non-blocking completion, else 1585 CEED_REQUEST_IMMEDIATE 1586 1587 @return An error code: 0 - success, otherwise - failure 1588 1589 @ref User 1590 **/ 1591 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1592 CeedVector assembled, CeedRequest *request) { 1593 int ierr; 1594 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1595 1596 CeedSize input_size = 0, output_size = 0; 1597 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1598 CeedChk(ierr); 1599 if (input_size != output_size) 1600 // LCOV_EXCL_START 1601 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1602 // LCOV_EXCL_STOP 1603 1604 // Use backend version, if available 1605 if (op->LinearAssemblePointBlockDiagonal) { 1606 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1607 CeedChk(ierr); 1608 return CEED_ERROR_SUCCESS; 1609 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1610 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1611 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1612 request); CeedChk(ierr); 1613 return CEED_ERROR_SUCCESS; 1614 } else { 1615 // Check for valid fallback resource 1616 const char *resource, *fallback_resource; 1617 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1618 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1619 CeedChk(ierr); 1620 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1621 // Fallback to reference Ceed 1622 if (!op->op_fallback) { 1623 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1624 } 1625 // Assemble 1626 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1627 assembled, request); CeedChk(ierr); 1628 return CEED_ERROR_SUCCESS; 1629 } 1630 } 1631 1632 // Default interface implementation 1633 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1634 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1635 CeedChk(ierr); 1636 return CEED_ERROR_SUCCESS; 1637 } 1638 1639 /** 1640 @brief Assemble the point block diagonal of a square linear CeedOperator 1641 1642 This sums into a CeedVector with the point block diagonal of a linear 1643 CeedOperator. 1644 1645 Note: Currently only non-composite CeedOperators with a single field and 1646 composite CeedOperators with single field sub-operators are supported. 1647 1648 Note: Calling this function asserts that setup is complete 1649 and sets the CeedOperator as immutable. 1650 1651 @param op CeedOperator to assemble CeedQFunction 1652 @param[out] assembled CeedVector to store assembled CeedOperator point block 1653 diagonal, provided in row-major form with an 1654 @a num_comp * @a num_comp block at each node. The dimensions 1655 of this vector are derived from the active vector 1656 for the CeedOperator. The array has shape 1657 [nodes, component out, component in]. 1658 @param request Address of CeedRequest for non-blocking completion, else 1659 CEED_REQUEST_IMMEDIATE 1660 1661 @return An error code: 0 - success, otherwise - failure 1662 1663 @ref User 1664 **/ 1665 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1666 CeedVector assembled, CeedRequest *request) { 1667 int ierr; 1668 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1669 1670 CeedSize input_size = 0, output_size = 0; 1671 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1672 CeedChk(ierr); 1673 if (input_size != output_size) 1674 // LCOV_EXCL_START 1675 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1676 // LCOV_EXCL_STOP 1677 1678 // Use backend version, if available 1679 if (op->LinearAssembleAddPointBlockDiagonal) { 1680 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1681 CeedChk(ierr); 1682 return CEED_ERROR_SUCCESS; 1683 } else { 1684 // Check for valid fallback resource 1685 const char *resource, *fallback_resource; 1686 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1687 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1688 CeedChk(ierr); 1689 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1690 // Fallback to reference Ceed 1691 if (!op->op_fallback) { 1692 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1693 } 1694 // Assemble 1695 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1696 assembled, request); CeedChk(ierr); 1697 return CEED_ERROR_SUCCESS; 1698 } 1699 } 1700 1701 // Default interface implemenation 1702 bool is_composite; 1703 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1704 if (is_composite) { 1705 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1706 true, assembled); CeedChk(ierr); 1707 return CEED_ERROR_SUCCESS; 1708 } else { 1709 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1710 CeedChk(ierr); 1711 return CEED_ERROR_SUCCESS; 1712 } 1713 } 1714 1715 /** 1716 @brief Fully assemble the nonzero pattern of a linear operator. 1717 1718 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1719 1720 The assembly routines use coordinate format, with num_entries tuples of the 1721 form (i, j, value) which indicate that value should be added to the matrix 1722 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1723 This function returns the number of entries and their (i, j) locations, 1724 while CeedOperatorLinearAssemble() provides the values in the same 1725 ordering. 1726 1727 This will generally be slow unless your operator is low-order. 1728 1729 Note: Calling this function asserts that setup is complete 1730 and sets the CeedOperator as immutable. 1731 1732 @param[in] op CeedOperator to assemble 1733 @param[out] num_entries Number of entries in coordinate nonzero pattern 1734 @param[out] rows Row number for each entry 1735 @param[out] cols Column number for each entry 1736 1737 @ref User 1738 **/ 1739 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1740 CeedInt **rows, CeedInt **cols) { 1741 int ierr; 1742 CeedInt num_suboperators, single_entries; 1743 CeedOperator *sub_operators; 1744 bool is_composite; 1745 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1746 1747 // Use backend version, if available 1748 if (op->LinearAssembleSymbolic) { 1749 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1750 return CEED_ERROR_SUCCESS; 1751 } else { 1752 // Check for valid fallback resource 1753 const char *resource, *fallback_resource; 1754 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1755 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1756 CeedChk(ierr); 1757 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1758 // Fallback to reference Ceed 1759 if (!op->op_fallback) { 1760 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1761 } 1762 // Assemble 1763 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1764 cols); CeedChk(ierr); 1765 return CEED_ERROR_SUCCESS; 1766 } 1767 } 1768 1769 // Default interface implementation 1770 1771 // count entries and allocate rows, cols arrays 1772 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1773 *num_entries = 0; 1774 if (is_composite) { 1775 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1776 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1777 for (CeedInt k = 0; k < num_suboperators; ++k) { 1778 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1779 &single_entries); CeedChk(ierr); 1780 *num_entries += single_entries; 1781 } 1782 } else { 1783 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1784 &single_entries); CeedChk(ierr); 1785 *num_entries += single_entries; 1786 } 1787 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1788 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1789 1790 // assemble nonzero locations 1791 CeedInt offset = 0; 1792 if (is_composite) { 1793 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1794 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1795 for (CeedInt k = 0; k < num_suboperators; ++k) { 1796 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1797 *cols); CeedChk(ierr); 1798 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1799 &single_entries); 1800 CeedChk(ierr); 1801 offset += single_entries; 1802 } 1803 } else { 1804 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1805 CeedChk(ierr); 1806 } 1807 1808 return CEED_ERROR_SUCCESS; 1809 } 1810 1811 /** 1812 @brief Fully assemble the nonzero entries of a linear operator. 1813 1814 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1815 1816 The assembly routines use coordinate format, with num_entries tuples of the 1817 form (i, j, value) which indicate that value should be added to the matrix 1818 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1819 This function returns the values of the nonzero entries to be added, their 1820 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1821 1822 This will generally be slow unless your operator is low-order. 1823 1824 Note: Calling this function asserts that setup is complete 1825 and sets the CeedOperator as immutable. 1826 1827 @param[in] op CeedOperator to assemble 1828 @param[out] values Values to assemble into matrix 1829 1830 @ref User 1831 **/ 1832 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1833 int ierr; 1834 CeedInt num_suboperators, single_entries = 0; 1835 CeedOperator *sub_operators; 1836 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1837 1838 // Use backend version, if available 1839 if (op->LinearAssemble) { 1840 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1841 return CEED_ERROR_SUCCESS; 1842 } else { 1843 // Check for valid fallback resource 1844 const char *resource, *fallback_resource; 1845 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1846 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1847 CeedChk(ierr); 1848 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1849 // Fallback to reference Ceed 1850 if (!op->op_fallback) { 1851 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1852 } 1853 // Assemble 1854 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1855 return CEED_ERROR_SUCCESS; 1856 } 1857 } 1858 1859 // Default interface implementation 1860 bool is_composite; 1861 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1862 1863 CeedInt offset = 0; 1864 if (is_composite) { 1865 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1866 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1867 for (CeedInt k = 0; k < num_suboperators; ++k) { 1868 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1869 CeedChk(ierr); 1870 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1871 &single_entries); 1872 CeedChk(ierr); 1873 offset += single_entries; 1874 } 1875 } else { 1876 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1877 } 1878 1879 return CEED_ERROR_SUCCESS; 1880 } 1881 1882 /** 1883 @brief Create a multigrid coarse operator and level transfer operators 1884 for a CeedOperator, creating the prolongation basis from the 1885 fine and coarse grid interpolation 1886 1887 Note: Calling this function asserts that setup is complete 1888 and sets the CeedOperator as immutable. 1889 1890 @param[in] op_fine Fine grid operator 1891 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1892 @param[in] rstr_coarse Coarse grid restriction 1893 @param[in] basis_coarse Coarse grid active vector basis 1894 @param[out] op_coarse Coarse grid operator 1895 @param[out] op_prolong Coarse to fine operator 1896 @param[out] op_restrict Fine to coarse operator 1897 1898 @return An error code: 0 - success, otherwise - failure 1899 1900 @ref User 1901 **/ 1902 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1903 CeedVector p_mult_fine, 1904 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1905 CeedOperator *op_coarse, CeedOperator *op_prolong, 1906 CeedOperator *op_restrict) { 1907 int ierr; 1908 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1909 Ceed ceed; 1910 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1911 1912 // Check for compatible quadrature spaces 1913 CeedBasis basis_fine; 1914 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1915 CeedInt Q_f, Q_c; 1916 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1917 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1918 if (Q_f != Q_c) 1919 // LCOV_EXCL_START 1920 return CeedError(ceed, CEED_ERROR_DIMENSION, 1921 "Bases must have compatible quadrature spaces"); 1922 // LCOV_EXCL_STOP 1923 1924 // Coarse to fine basis 1925 CeedInt P_f, P_c, Q = Q_f; 1926 bool is_tensor_f, is_tensor_c; 1927 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1928 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1929 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1930 if (is_tensor_f && is_tensor_c) { 1931 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1932 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1933 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1934 } else if (!is_tensor_f && !is_tensor_c) { 1935 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1936 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1937 } else { 1938 // LCOV_EXCL_START 1939 return CeedError(ceed, CEED_ERROR_MINOR, 1940 "Bases must both be tensor or non-tensor"); 1941 // LCOV_EXCL_STOP 1942 } 1943 1944 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1945 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1946 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1947 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1948 const CeedScalar *interp_f_source = NULL, *interp_c_source = NULL; 1949 if (is_tensor_f) { 1950 ierr = CeedBasisGetInterp1D(basis_fine, &interp_f_source); CeedChk(ierr); 1951 ierr = CeedBasisGetInterp1D(basis_coarse, &interp_c_source); CeedChk(ierr); 1952 } else { 1953 ierr = CeedBasisGetInterp(basis_fine, &interp_f_source); CeedChk(ierr); 1954 ierr = CeedBasisGetInterp(basis_coarse, &interp_c_source); CeedChk(ierr); 1955 } 1956 memcpy(interp_f, interp_f_source, Q*P_f*sizeof interp_f_source[0]); 1957 memcpy(interp_c, interp_c_source, Q*P_c*sizeof interp_c_source[0]); 1958 1959 // -- QR Factorization, interp_f = Q R 1960 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1961 1962 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1963 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1964 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1965 1966 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1967 for (CeedInt j=0; j<P_c; j++) { // Column j 1968 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1969 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1970 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1971 for (CeedInt k=i+1; k<P_f; k++) 1972 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1973 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1974 } 1975 } 1976 ierr = CeedFree(&tau); CeedChk(ierr); 1977 ierr = CeedFree(&interp_c); CeedChk(ierr); 1978 ierr = CeedFree(&interp_f); CeedChk(ierr); 1979 1980 // Complete with interp_c_to_f versions of code 1981 if (is_tensor_f) { 1982 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1983 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1984 CeedChk(ierr); 1985 } else { 1986 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1987 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1988 CeedChk(ierr); 1989 } 1990 1991 // Cleanup 1992 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1993 return CEED_ERROR_SUCCESS; 1994 } 1995 1996 /** 1997 @brief Create a multigrid coarse operator and level transfer operators 1998 for a CeedOperator with a tensor basis for the active basis 1999 2000 Note: Calling this function asserts that setup is complete 2001 and sets the CeedOperator as immutable. 2002 2003 @param[in] op_fine Fine grid operator 2004 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2005 @param[in] rstr_coarse Coarse grid restriction 2006 @param[in] basis_coarse Coarse grid active vector basis 2007 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 2008 @param[out] op_coarse Coarse grid operator 2009 @param[out] op_prolong Coarse to fine operator 2010 @param[out] op_restrict Fine to coarse operator 2011 2012 @return An error code: 0 - success, otherwise - failure 2013 2014 @ref User 2015 **/ 2016 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 2017 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2018 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 2019 CeedOperator *op_prolong, CeedOperator *op_restrict) { 2020 int ierr; 2021 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2022 Ceed ceed; 2023 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2024 2025 // Check for compatible quadrature spaces 2026 CeedBasis basis_fine; 2027 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2028 CeedInt Q_f, Q_c; 2029 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2030 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2031 if (Q_f != Q_c) 2032 // LCOV_EXCL_START 2033 return CeedError(ceed, CEED_ERROR_DIMENSION, 2034 "Bases must have compatible quadrature spaces"); 2035 // LCOV_EXCL_STOP 2036 2037 // Coarse to fine basis 2038 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2039 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2040 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2041 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 2042 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2043 CeedChk(ierr); 2044 P_1d_c = dim == 1 ? num_nodes_c : 2045 dim == 2 ? sqrt(num_nodes_c) : 2046 cbrt(num_nodes_c); 2047 CeedScalar *q_ref, *q_weight, *grad; 2048 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 2049 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 2050 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 2051 CeedBasis basis_c_to_f; 2052 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 2053 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2054 CeedChk(ierr); 2055 ierr = CeedFree(&q_ref); CeedChk(ierr); 2056 ierr = CeedFree(&q_weight); CeedChk(ierr); 2057 ierr = CeedFree(&grad); CeedChk(ierr); 2058 2059 // Core code 2060 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2061 basis_coarse, basis_c_to_f, op_coarse, 2062 op_prolong, op_restrict); 2063 CeedChk(ierr); 2064 return CEED_ERROR_SUCCESS; 2065 } 2066 2067 /** 2068 @brief Create a multigrid coarse operator and level transfer operators 2069 for a CeedOperator with a non-tensor basis for the active vector 2070 2071 Note: Calling this function asserts that setup is complete 2072 and sets the CeedOperator as immutable. 2073 2074 @param[in] op_fine Fine grid operator 2075 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2076 @param[in] rstr_coarse Coarse grid restriction 2077 @param[in] basis_coarse Coarse grid active vector basis 2078 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 2079 @param[out] op_coarse Coarse grid operator 2080 @param[out] op_prolong Coarse to fine operator 2081 @param[out] op_restrict Fine to coarse operator 2082 2083 @return An error code: 0 - success, otherwise - failure 2084 2085 @ref User 2086 **/ 2087 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 2088 CeedVector p_mult_fine, 2089 CeedElemRestriction rstr_coarse, 2090 CeedBasis basis_coarse, 2091 const CeedScalar *interp_c_to_f, 2092 CeedOperator *op_coarse, 2093 CeedOperator *op_prolong, 2094 CeedOperator *op_restrict) { 2095 int ierr; 2096 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2097 Ceed ceed; 2098 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2099 2100 // Check for compatible quadrature spaces 2101 CeedBasis basis_fine; 2102 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2103 CeedInt Q_f, Q_c; 2104 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2105 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2106 if (Q_f != Q_c) 2107 // LCOV_EXCL_START 2108 return CeedError(ceed, CEED_ERROR_DIMENSION, 2109 "Bases must have compatible quadrature spaces"); 2110 // LCOV_EXCL_STOP 2111 2112 // Coarse to fine basis 2113 CeedElemTopology topo; 2114 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 2115 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2116 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2117 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2118 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 2119 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2120 CeedChk(ierr); 2121 CeedScalar *q_ref, *q_weight, *grad; 2122 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 2123 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 2124 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 2125 CeedBasis basis_c_to_f; 2126 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 2127 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2128 CeedChk(ierr); 2129 ierr = CeedFree(&q_ref); CeedChk(ierr); 2130 ierr = CeedFree(&q_weight); CeedChk(ierr); 2131 ierr = CeedFree(&grad); CeedChk(ierr); 2132 2133 // Core code 2134 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2135 basis_coarse, basis_c_to_f, op_coarse, 2136 op_prolong, op_restrict); 2137 CeedChk(ierr); 2138 return CEED_ERROR_SUCCESS; 2139 } 2140 2141 /** 2142 @brief Build a FDM based approximate inverse for each element for a 2143 CeedOperator 2144 2145 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2146 Method based approximate inverse. This function obtains the simultaneous 2147 diagonalization for the 1D mass and Laplacian operators, 2148 M = V^T V, K = V^T S V. 2149 The assembled QFunction is used to modify the eigenvalues from simultaneous 2150 diagonalization and obtain an approximate inverse of the form 2151 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2152 associated CeedQFunction must therefore also be linear. 2153 2154 Note: Calling this function asserts that setup is complete 2155 and sets the CeedOperator as immutable. 2156 2157 @param op CeedOperator to create element inverses 2158 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2159 for each element 2160 @param request Address of CeedRequest for non-blocking completion, else 2161 @ref CEED_REQUEST_IMMEDIATE 2162 2163 @return An error code: 0 - success, otherwise - failure 2164 2165 @ref User 2166 **/ 2167 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2168 CeedRequest *request) { 2169 int ierr; 2170 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2171 2172 // Use backend version, if available 2173 if (op->CreateFDMElementInverse) { 2174 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2175 return CEED_ERROR_SUCCESS; 2176 } else { 2177 // Check for valid fallback resource 2178 const char *resource, *fallback_resource; 2179 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2180 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2181 CeedChk(ierr); 2182 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2183 // Fallback to reference Ceed 2184 if (!op->op_fallback) { 2185 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2186 } 2187 // Assemble 2188 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2189 CeedChk(ierr); 2190 return CEED_ERROR_SUCCESS; 2191 } 2192 } 2193 2194 // Interface implementation 2195 Ceed ceed, ceed_parent; 2196 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2197 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2198 ceed_parent = ceed_parent ? ceed_parent : ceed; 2199 CeedQFunction qf; 2200 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2201 2202 // Determine active input basis 2203 bool interp = false, grad = false; 2204 CeedBasis basis = NULL; 2205 CeedElemRestriction rstr = NULL; 2206 CeedOperatorField *op_fields; 2207 CeedQFunctionField *qf_fields; 2208 CeedInt num_input_fields; 2209 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2210 CeedChk(ierr); 2211 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2212 for (CeedInt i=0; i<num_input_fields; i++) { 2213 CeedVector vec; 2214 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2215 if (vec == CEED_VECTOR_ACTIVE) { 2216 CeedEvalMode eval_mode; 2217 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2218 interp = interp || eval_mode == CEED_EVAL_INTERP; 2219 grad = grad || eval_mode == CEED_EVAL_GRAD; 2220 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2221 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2222 } 2223 } 2224 if (!basis) 2225 // LCOV_EXCL_START 2226 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2227 // LCOV_EXCL_STOP 2228 CeedSize l_size = 1; 2229 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2230 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2231 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2232 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2233 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2234 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2235 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2236 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2237 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2238 2239 // Build and diagonalize 1D Mass and Laplacian 2240 bool tensor_basis; 2241 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2242 if (!tensor_basis) 2243 // LCOV_EXCL_START 2244 return CeedError(ceed, CEED_ERROR_BACKEND, 2245 "FDMElementInverse only supported for tensor " 2246 "bases"); 2247 // LCOV_EXCL_STOP 2248 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2249 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2250 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2251 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2252 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2253 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2254 // -- Build matrices 2255 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2256 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2257 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2258 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2259 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2260 mass, laplace); CeedChk(ierr); 2261 2262 // -- Diagonalize 2263 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2264 CeedChk(ierr); 2265 ierr = CeedFree(&mass); CeedChk(ierr); 2266 ierr = CeedFree(&laplace); CeedChk(ierr); 2267 for (CeedInt i=0; i<P_1d; i++) 2268 for (CeedInt j=0; j<P_1d; j++) 2269 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2270 ierr = CeedFree(&x); CeedChk(ierr); 2271 2272 // Assemble QFunction 2273 CeedVector assembled; 2274 CeedElemRestriction rstr_qf; 2275 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2276 &rstr_qf, request); CeedChk(ierr); 2277 CeedInt layout[3]; 2278 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2279 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2280 CeedScalar max_norm = 0; 2281 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2282 2283 // Calculate element averages 2284 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2285 CeedScalar *elem_avg; 2286 const CeedScalar *assembled_array, *q_weight_array; 2287 CeedVector q_weight; 2288 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2289 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2290 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2291 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2292 CeedChk(ierr); 2293 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2294 CeedChk(ierr); 2295 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2296 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2297 for (CeedInt e=0; e<num_elem; e++) { 2298 CeedInt count = 0; 2299 for (CeedInt q=0; q<num_qpts; q++) 2300 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2301 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2302 qf_value_bound) { 2303 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2304 q_weight_array[q]; 2305 count++; 2306 } 2307 if (count) { 2308 elem_avg[e] /= count; 2309 } else { 2310 elem_avg[e] = 1.0; 2311 } 2312 } 2313 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2314 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2315 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2316 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2317 2318 // Build FDM diagonal 2319 CeedVector q_data; 2320 CeedScalar *q_data_array, *fdm_diagonal; 2321 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2322 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2323 for (CeedInt c=0; c<num_comp; c++) 2324 for (CeedInt n=0; n<elem_size; n++) { 2325 if (interp) 2326 fdm_diagonal[c*elem_size + n] = 1.0; 2327 if (grad) 2328 for (CeedInt d=0; d<dim; d++) { 2329 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2330 fdm_diagonal[c*elem_size + n] += lambda[i]; 2331 } 2332 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2333 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2334 } 2335 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2336 CeedChk(ierr); 2337 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2338 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2339 CeedChk(ierr); 2340 for (CeedInt e=0; e<num_elem; e++) 2341 for (CeedInt c=0; c<num_comp; c++) 2342 for (CeedInt n=0; n<elem_size; n++) 2343 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2344 fdm_diagonal[c*elem_size + n]); 2345 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2346 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2347 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2348 2349 // Setup FDM operator 2350 // -- Basis 2351 CeedBasis fdm_basis; 2352 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2353 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2354 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2355 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2356 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2357 fdm_interp, grad_dummy, q_ref_dummy, 2358 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2359 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2360 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2361 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2362 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2363 ierr = CeedFree(&lambda); CeedChk(ierr); 2364 2365 // -- Restriction 2366 CeedElemRestriction rstr_qd_i; 2367 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2368 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2369 num_comp, num_elem*num_comp*elem_size, 2370 strides, &rstr_qd_i); CeedChk(ierr); 2371 // -- QFunction 2372 CeedQFunction qf_fdm; 2373 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2374 CeedChk(ierr); 2375 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2376 CeedChk(ierr); 2377 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2378 CeedChk(ierr); 2379 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2380 CeedChk(ierr); 2381 ierr = CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp); CeedChk(ierr); 2382 // -- QFunction context 2383 CeedInt *num_comp_data; 2384 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2385 num_comp_data[0] = num_comp; 2386 CeedQFunctionContext ctx_fdm; 2387 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2388 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2389 sizeof(*num_comp_data), num_comp_data); 2390 CeedChk(ierr); 2391 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2392 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2393 // -- Operator 2394 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2395 CeedChk(ierr); 2396 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2397 CeedChk(ierr); 2398 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2399 q_data); CeedChk(ierr); 2400 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2401 CeedChk(ierr); 2402 2403 // Cleanup 2404 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2405 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2406 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2407 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2408 2409 return CEED_ERROR_SUCCESS; 2410 } 2411 2412 /// @} 2413