1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed-impl.h> 11 #include <math.h> 12 #include <stdbool.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 /// @file 17 /// Implementation of CeedOperator preconditioning interfaces 18 19 /// ---------------------------------------------------------------------------- 20 /// CeedOperator Library Internal Preconditioning Functions 21 /// ---------------------------------------------------------------------------- 22 /// @addtogroup CeedOperatorDeveloper 23 /// @{ 24 25 /** 26 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced 27 CeedOperator functionality 28 29 @param[in] qf CeedQFunction to create fallback for 30 @param[out] fallback_qf fallback CeedQFunction 31 32 @return An error code: 0 - success, otherwise - failure 33 34 @ref Developer 35 **/ 36 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, 37 CeedQFunction *qf_fallback) { 38 int ierr; 39 40 // Check if NULL qf passed in 41 if (!qf) return CEED_ERROR_SUCCESS; 42 43 char *source_path_with_name = ""; 44 if (qf->source_path) { 45 size_t path_len = strlen(qf->source_path), 46 name_len = strlen(qf->kernel_name); 47 ierr = CeedCalloc(path_len + name_len + 2, &source_path_with_name); 48 CeedChk(ierr); 49 memcpy(source_path_with_name, qf->source_path, path_len); 50 memcpy(&source_path_with_name[path_len], ":", 1); 51 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 52 } else { 53 ierr = CeedCalloc(1, &source_path_with_name); CeedChk(ierr); 54 } 55 56 ierr = CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, 57 qf->function, source_path_with_name, 58 qf_fallback); CeedChk(ierr); 59 { 60 CeedQFunctionContext ctx; 61 62 ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr); 63 ierr = CeedQFunctionSetContext(*qf_fallback, ctx); CeedChk(ierr); 64 } 65 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 66 ierr = CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, 67 qf->input_fields[i]->size, 68 qf->input_fields[i]->eval_mode); CeedChk(ierr); 69 } 70 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 71 ierr = CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, 72 qf->output_fields[i]->size, 73 qf->output_fields[i]->eval_mode); CeedChk(ierr); 74 } 75 ierr = CeedFree(&source_path_with_name); CeedChk(ierr); 76 77 return CEED_ERROR_SUCCESS; 78 } 79 80 /** 81 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 82 CeedOperator functionality 83 84 @param op CeedOperator to create fallback for 85 86 @return An error code: 0 - success, otherwise - failure 87 88 @ref Developer 89 **/ 90 int CeedOperatorCreateFallback(CeedOperator op) { 91 int ierr; 92 Ceed ceed_fallback; 93 94 // Check not already created 95 if (op->op_fallback) return CEED_ERROR_SUCCESS; 96 97 // Fallback Ceed 98 ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr); 99 100 // Clone Op 101 CeedOperator op_fallback; 102 if (op->is_composite) { 103 ierr = CeedCompositeOperatorCreate(ceed_fallback, &op_fallback); 104 CeedChk(ierr); 105 for (CeedInt i = 0; i < op->num_suboperators; i++) { 106 ierr = CeedCompositeOperatorAddSub(op_fallback, op->sub_operators[i]); 107 CeedChk(ierr); 108 } 109 } else { 110 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 111 ierr = CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback); 112 CeedChk(ierr); 113 ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback); 114 CeedChk(ierr); 115 ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback); 116 CeedChk(ierr); 117 ierr = CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, 118 dqfT_fallback, &op_fallback); CeedChk(ierr); 119 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 120 ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, 121 op->input_fields[i]->elem_restr, 122 op->input_fields[i]->basis, 123 op->input_fields[i]->vec); CeedChk(ierr); 124 } 125 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 126 ierr = CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, 127 op->output_fields[i]->elem_restr, 128 op->output_fields[i]->basis, 129 op->output_fields[i]->vec); CeedChk(ierr); 130 } 131 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 132 &op_fallback->qf_assembled); CeedChk(ierr); 133 if (op_fallback->num_qpts == 0) { 134 ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts); 135 CeedChk(ierr); 136 } 137 // Cleanup 138 ierr = CeedQFunctionDestroy(&qf_fallback); CeedChk(ierr); 139 ierr = CeedQFunctionDestroy(&dqf_fallback); CeedChk(ierr); 140 ierr = CeedQFunctionDestroy(&dqfT_fallback); CeedChk(ierr); 141 } 142 ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr); 143 ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr); 144 op->op_fallback = op_fallback; 145 146 return CEED_ERROR_SUCCESS; 147 } 148 149 /** 150 @brief Select correct basis matrix pointer based on CeedEvalMode 151 152 @param[in] eval_mode Current basis evaluation mode 153 @param[in] identity Pointer to identity matrix 154 @param[in] interp Pointer to interpolation matrix 155 @param[in] grad Pointer to gradient matrix 156 @param[out] basis_ptr Basis pointer to set 157 158 @ref Developer 159 **/ 160 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 161 const CeedScalar *identity, const CeedScalar *interp, 162 const CeedScalar *grad, const CeedScalar **basis_ptr) { 163 switch (eval_mode) { 164 case CEED_EVAL_NONE: 165 *basis_ptr = identity; 166 break; 167 case CEED_EVAL_INTERP: 168 *basis_ptr = interp; 169 break; 170 case CEED_EVAL_GRAD: 171 *basis_ptr = grad; 172 break; 173 case CEED_EVAL_WEIGHT: 174 case CEED_EVAL_DIV: 175 case CEED_EVAL_CURL: 176 break; // Caught by QF Assembly 177 } 178 } 179 180 /** 181 @brief Create point block restriction for active operator field 182 183 @param[in] rstr Original CeedElemRestriction for active field 184 @param[out] pointblock_rstr Address of the variable where the newly created 185 CeedElemRestriction will be stored 186 187 @return An error code: 0 - success, otherwise - failure 188 189 @ref Developer 190 **/ 191 static int CeedOperatorCreateActivePointBlockRestriction( 192 CeedElemRestriction rstr, 193 CeedElemRestriction *pointblock_rstr) { 194 int ierr; 195 Ceed ceed; 196 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 197 const CeedInt *offsets; 198 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 199 CeedChk(ierr); 200 201 // Expand offsets 202 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 203 *pointblock_offsets; 204 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 205 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 206 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 207 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 208 CeedInt shift = num_comp; 209 if (comp_stride != 1) 210 shift *= num_comp; 211 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 212 CeedChk(ierr); 213 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 214 pointblock_offsets[i] = offsets[i]*shift; 215 if (pointblock_offsets[i] > max) 216 max = pointblock_offsets[i]; 217 } 218 219 // Create new restriction 220 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 221 1, max + num_comp*num_comp, CEED_MEM_HOST, 222 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 223 CeedChk(ierr); 224 225 // Cleanup 226 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 227 228 return CEED_ERROR_SUCCESS; 229 } 230 231 /** 232 @brief Core logic for assembling operator diagonal or point block diagonal 233 234 @param[in] op CeedOperator to assemble point block diagonal 235 @param[in] request Address of CeedRequest for non-blocking completion, else 236 CEED_REQUEST_IMMEDIATE 237 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 238 @param[out] assembled CeedVector to store assembled diagonal 239 240 @return An error code: 0 - success, otherwise - failure 241 242 @ref Developer 243 **/ 244 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 245 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 246 int ierr; 247 Ceed ceed; 248 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 249 250 // Assemble QFunction 251 CeedQFunction qf; 252 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 253 CeedInt num_input_fields, num_output_fields; 254 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 255 CeedChk(ierr); 256 CeedVector assembled_qf; 257 CeedElemRestriction rstr; 258 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 259 &rstr, request); CeedChk(ierr); 260 CeedInt layout[3]; 261 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 262 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 263 CeedScalar max_norm = 0; 264 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 265 266 // Determine active input basis 267 CeedOperatorField *op_fields; 268 CeedQFunctionField *qf_fields; 269 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 270 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 271 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 272 CeedEvalMode *eval_mode_in = NULL; 273 CeedBasis basis_in = NULL; 274 CeedElemRestriction rstr_in = NULL; 275 for (CeedInt i=0; i<num_input_fields; i++) { 276 CeedVector vec; 277 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 278 if (vec == CEED_VECTOR_ACTIVE) { 279 CeedElemRestriction rstr; 280 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 281 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 282 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 283 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 284 if (rstr_in && rstr_in != rstr) 285 // LCOV_EXCL_START 286 return CeedError(ceed, CEED_ERROR_BACKEND, 287 "Multi-field non-composite operator diagonal assembly not supported"); 288 // LCOV_EXCL_STOP 289 rstr_in = rstr; 290 CeedEvalMode eval_mode; 291 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 292 switch (eval_mode) { 293 case CEED_EVAL_NONE: 294 case CEED_EVAL_INTERP: 295 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 296 eval_mode_in[num_eval_mode_in] = eval_mode; 297 num_eval_mode_in += 1; 298 break; 299 case CEED_EVAL_GRAD: 300 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 301 for (CeedInt d=0; d<dim; d++) 302 eval_mode_in[num_eval_mode_in+d] = eval_mode; 303 num_eval_mode_in += dim; 304 break; 305 case CEED_EVAL_WEIGHT: 306 case CEED_EVAL_DIV: 307 case CEED_EVAL_CURL: 308 break; // Caught by QF Assembly 309 } 310 } 311 } 312 313 // Determine active output basis 314 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 315 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 316 CeedInt num_eval_mode_out = 0; 317 CeedEvalMode *eval_mode_out = NULL; 318 CeedBasis basis_out = NULL; 319 CeedElemRestriction rstr_out = NULL; 320 for (CeedInt i=0; i<num_output_fields; i++) { 321 CeedVector vec; 322 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 323 if (vec == CEED_VECTOR_ACTIVE) { 324 CeedElemRestriction rstr; 325 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 326 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 327 if (rstr_out && rstr_out != rstr) 328 // LCOV_EXCL_START 329 return CeedError(ceed, CEED_ERROR_BACKEND, 330 "Multi-field non-composite operator diagonal assembly not supported"); 331 // LCOV_EXCL_STOP 332 rstr_out = rstr; 333 CeedEvalMode eval_mode; 334 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 335 switch (eval_mode) { 336 case CEED_EVAL_NONE: 337 case CEED_EVAL_INTERP: 338 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 339 eval_mode_out[num_eval_mode_out] = eval_mode; 340 num_eval_mode_out += 1; 341 break; 342 case CEED_EVAL_GRAD: 343 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 344 for (CeedInt d=0; d<dim; d++) 345 eval_mode_out[num_eval_mode_out+d] = eval_mode; 346 num_eval_mode_out += dim; 347 break; 348 case CEED_EVAL_WEIGHT: 349 case CEED_EVAL_DIV: 350 case CEED_EVAL_CURL: 351 break; // Caught by QF Assembly 352 } 353 } 354 } 355 356 // Assemble point block diagonal restriction, if needed 357 CeedElemRestriction diag_rstr = rstr_out; 358 if (is_pointblock) { 359 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 360 CeedChk(ierr); 361 } 362 363 // Create diagonal vector 364 CeedVector elem_diag; 365 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 366 CeedChk(ierr); 367 368 // Assemble element operator diagonals 369 CeedScalar *elem_diag_array; 370 const CeedScalar *assembled_qf_array; 371 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 372 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 373 CeedChk(ierr); 374 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 375 CeedChk(ierr); 376 CeedInt num_elem, num_nodes, num_qpts; 377 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 378 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 379 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 380 // Basis matrices 381 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 382 CeedScalar *identity = NULL; 383 bool evalNone = false; 384 for (CeedInt i=0; i<num_eval_mode_in; i++) 385 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 386 for (CeedInt i=0; i<num_eval_mode_out; i++) 387 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 388 if (evalNone) { 389 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 390 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 391 identity[i*num_nodes+i] = 1.0; 392 } 393 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 394 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 395 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 396 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 397 // Compute the diagonal of B^T D B 398 // Each element 399 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 400 for (CeedInt e=0; e<num_elem; e++) { 401 CeedInt d_out = -1; 402 // Each basis eval mode pair 403 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 404 const CeedScalar *bt = NULL; 405 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 406 d_out += 1; 407 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 408 &grad_out[d_out*num_qpts*num_nodes], &bt); 409 CeedInt d_in = -1; 410 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 411 const CeedScalar *b = NULL; 412 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 413 d_in += 1; 414 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 415 &grad_in[d_in*num_qpts*num_nodes], &b); 416 // Each component 417 for (CeedInt c_out=0; c_out<num_comp; c_out++) 418 // Each qpoint/node pair 419 for (CeedInt q=0; q<num_qpts; q++) 420 if (is_pointblock) { 421 // Point Block Diagonal 422 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 423 const CeedScalar qf_value = 424 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 425 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 426 if (fabs(qf_value) > qf_value_bound) 427 for (CeedInt n=0; n<num_nodes; n++) 428 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 429 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 430 } 431 } else { 432 // Diagonal Only 433 const CeedScalar qf_value = 434 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 435 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 436 if (fabs(qf_value) > qf_value_bound) 437 for (CeedInt n=0; n<num_nodes; n++) 438 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 439 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 440 } 441 } 442 } 443 } 444 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 445 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 446 CeedChk(ierr); 447 448 // Assemble local operator diagonal 449 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 450 assembled, request); CeedChk(ierr); 451 452 // Cleanup 453 if (is_pointblock) { 454 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 455 } 456 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 457 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 458 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 459 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 460 ierr = CeedFree(&identity); CeedChk(ierr); 461 462 return CEED_ERROR_SUCCESS; 463 } 464 465 /** 466 @brief Core logic for assembling composite operator diagonal 467 468 @param[in] op CeedOperator to assemble point block diagonal 469 @param[in] request Address of CeedRequest for non-blocking completion, else 470 CEED_REQUEST_IMMEDIATE 471 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 472 @param[out] assembled CeedVector to store assembled diagonal 473 474 @return An error code: 0 - success, otherwise - failure 475 476 @ref Developer 477 **/ 478 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 479 CeedOperator op, CeedRequest *request, const bool is_pointblock, 480 CeedVector assembled) { 481 int ierr; 482 CeedInt num_sub; 483 CeedOperator *suboperators; 484 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 485 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 486 for (CeedInt i = 0; i < num_sub; i++) { 487 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 488 is_pointblock, assembled); CeedChk(ierr); 489 } 490 return CEED_ERROR_SUCCESS; 491 } 492 493 /** 494 @brief Build nonzero pattern for non-composite operator 495 496 Users should generally use CeedOperatorLinearAssembleSymbolic() 497 498 @param[in] op CeedOperator to assemble nonzero pattern 499 @param[in] offset Offset for number of entries 500 @param[out] rows Row number for each entry 501 @param[out] cols Column number for each entry 502 503 @return An error code: 0 - success, otherwise - failure 504 505 @ref Developer 506 **/ 507 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 508 CeedInt *rows, CeedInt *cols) { 509 int ierr; 510 Ceed ceed = op->ceed; 511 if (op->is_composite) 512 // LCOV_EXCL_START 513 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 514 "Composite operator not supported"); 515 // LCOV_EXCL_STOP 516 517 CeedSize num_nodes; 518 ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr); 519 CeedElemRestriction rstr_in; 520 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 521 CeedInt num_elem, elem_size, num_comp; 522 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 523 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 524 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 525 CeedInt layout_er[3]; 526 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 527 528 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 529 530 // Determine elem_dof relation 531 CeedVector index_vec; 532 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 533 CeedScalar *array; 534 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 535 for (CeedInt i = 0; i < num_nodes; ++i) { 536 array[i] = i; 537 } 538 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 539 CeedVector elem_dof; 540 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 541 CeedChk(ierr); 542 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 543 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 544 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 545 const CeedScalar *elem_dof_a; 546 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 547 CeedChk(ierr); 548 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 549 550 // Determine i, j locations for element matrices 551 CeedInt count = 0; 552 for (CeedInt e = 0; e < num_elem; ++e) { 553 for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) { 554 for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) { 555 for (CeedInt i = 0; i < elem_size; ++i) { 556 for (CeedInt j = 0; j < elem_size; ++j) { 557 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 558 (comp_out)*layout_er[1] + e*layout_er[2]; 559 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 560 (comp_in)*layout_er[1] + e*layout_er[2]; 561 562 const CeedInt row = elem_dof_a[elem_dof_index_row]; 563 const CeedInt col = elem_dof_a[elem_dof_index_col]; 564 565 rows[offset + count] = row; 566 cols[offset + count] = col; 567 count++; 568 } 569 } 570 } 571 } 572 } 573 if (count != local_num_entries) 574 // LCOV_EXCL_START 575 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 576 // LCOV_EXCL_STOP 577 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 578 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 579 580 return CEED_ERROR_SUCCESS; 581 } 582 583 /** 584 @brief Assemble nonzero entries for non-composite operator 585 586 Users should generally use CeedOperatorLinearAssemble() 587 588 @param[in] op CeedOperator to assemble 589 @param[out] offset Offest for number of entries 590 @param[out] values Values to assemble into matrix 591 592 @return An error code: 0 - success, otherwise - failure 593 594 @ref Developer 595 **/ 596 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 597 CeedVector values) { 598 int ierr; 599 Ceed ceed = op->ceed; 600 if (op->is_composite) 601 // LCOV_EXCL_START 602 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 603 "Composite operator not supported"); 604 // LCOV_EXCL_STOP 605 606 // Assemble QFunction 607 CeedQFunction qf; 608 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 609 CeedVector assembled_qf; 610 CeedElemRestriction rstr_q; 611 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 612 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 613 614 CeedSize qf_length; 615 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 616 617 CeedInt num_input_fields, num_output_fields; 618 CeedOperatorField *input_fields; 619 CeedOperatorField *output_fields; 620 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 621 &num_output_fields, &output_fields); CeedChk(ierr); 622 623 // Determine active input basis 624 CeedQFunctionField *qf_fields; 625 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 626 CeedInt num_eval_mode_in = 0, dim = 1; 627 CeedEvalMode *eval_mode_in = NULL; 628 CeedBasis basis_in = NULL; 629 CeedElemRestriction rstr_in = NULL; 630 for (CeedInt i=0; i<num_input_fields; i++) { 631 CeedVector vec; 632 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 633 if (vec == CEED_VECTOR_ACTIVE) { 634 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 635 CeedChk(ierr); 636 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 637 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 638 CeedChk(ierr); 639 CeedEvalMode eval_mode; 640 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 641 CeedChk(ierr); 642 switch (eval_mode) { 643 case CEED_EVAL_NONE: 644 case CEED_EVAL_INTERP: 645 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 646 eval_mode_in[num_eval_mode_in] = eval_mode; 647 num_eval_mode_in += 1; 648 break; 649 case CEED_EVAL_GRAD: 650 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 651 for (CeedInt d=0; d<dim; d++) { 652 eval_mode_in[num_eval_mode_in+d] = eval_mode; 653 } 654 num_eval_mode_in += dim; 655 break; 656 case CEED_EVAL_WEIGHT: 657 case CEED_EVAL_DIV: 658 case CEED_EVAL_CURL: 659 break; // Caught by QF Assembly 660 } 661 } 662 } 663 664 // Determine active output basis 665 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 666 CeedInt num_eval_mode_out = 0; 667 CeedEvalMode *eval_mode_out = NULL; 668 CeedBasis basis_out = NULL; 669 CeedElemRestriction rstr_out = NULL; 670 for (CeedInt i=0; i<num_output_fields; i++) { 671 CeedVector vec; 672 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 673 if (vec == CEED_VECTOR_ACTIVE) { 674 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 675 CeedChk(ierr); 676 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 677 CeedChk(ierr); 678 CeedEvalMode eval_mode; 679 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 680 CeedChk(ierr); 681 switch (eval_mode) { 682 case CEED_EVAL_NONE: 683 case CEED_EVAL_INTERP: 684 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 685 eval_mode_out[num_eval_mode_out] = eval_mode; 686 num_eval_mode_out += 1; 687 break; 688 case CEED_EVAL_GRAD: 689 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 690 for (CeedInt d=0; d<dim; d++) { 691 eval_mode_out[num_eval_mode_out+d] = eval_mode; 692 } 693 num_eval_mode_out += dim; 694 break; 695 case CEED_EVAL_WEIGHT: 696 case CEED_EVAL_DIV: 697 case CEED_EVAL_CURL: 698 break; // Caught by QF Assembly 699 } 700 } 701 } 702 703 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 704 // LCOV_EXCL_START 705 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 706 "Cannot assemble operator with out inputs/outputs"); 707 // LCOV_EXCL_STOP 708 709 CeedInt num_elem, elem_size, num_qpts, num_comp; 710 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 711 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 712 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 713 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 714 715 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 716 717 // loop over elements and put in data structure 718 const CeedScalar *interp_in, *grad_in; 719 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 720 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 721 722 const CeedScalar *assembled_qf_array; 723 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 724 CeedChk(ierr); 725 726 CeedInt layout_qf[3]; 727 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 728 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 729 730 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 731 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 732 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 733 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 734 num_qpts]; // logically 3-tensor 735 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 736 CeedScalar elem_mat[elem_size * elem_size]; 737 CeedInt count = 0; 738 CeedScalar *vals; 739 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 740 for (CeedInt e = 0; e < num_elem; ++e) { 741 for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) { 742 for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) { 743 for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 744 B_mat_in[ell] = 0.0; 745 } 746 for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 747 B_mat_out[ell] = 0.0; 748 } 749 // Store block-diagonal D matrix as collection of small dense blocks 750 for (CeedInt ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; 751 ++ell) { 752 D_mat[ell] = 0.0; 753 } 754 // form element matrix itself (for each block component) 755 for (CeedInt ell = 0; ell < elem_size*elem_size; ++ell) { 756 elem_mat[ell] = 0.0; 757 } 758 for (CeedInt q = 0; q < num_qpts; ++q) { 759 for (CeedInt n = 0; n < elem_size; ++n) { 760 CeedInt d_in = -1; 761 for (CeedInt e_in = 0; e_in < num_eval_mode_in; ++e_in) { 762 const CeedInt qq = num_eval_mode_in*q; 763 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 764 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 765 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 766 d_in += 1; 767 B_mat_in[(qq+e_in)*elem_size + n] += 768 grad_in[(d_in*num_qpts+q) * elem_size + n]; 769 } else { 770 // LCOV_EXCL_START 771 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 772 // LCOV_EXCL_STOP 773 } 774 } 775 CeedInt d_out = -1; 776 for (CeedInt e_out = 0; e_out < num_eval_mode_out; ++e_out) { 777 const CeedInt qq = num_eval_mode_out*q; 778 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 779 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 780 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 781 d_out += 1; 782 B_mat_out[(qq+e_out)*elem_size + n] += 783 grad_in[(d_out*num_qpts+q) * elem_size + n]; 784 } else { 785 // LCOV_EXCL_START 786 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 787 // LCOV_EXCL_STOP 788 } 789 } 790 } 791 for (CeedInt ei = 0; ei < num_eval_mode_out; ++ei) { 792 for (CeedInt ej = 0; ej < num_eval_mode_in; ++ej) { 793 const CeedInt eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej) 794 *num_comp 795 +comp_out; 796 const CeedInt index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 797 e*layout_qf[2]; 798 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 799 } 800 } 801 } 802 // Compute B^T*D 803 for (CeedInt ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 804 BTD[ell] = 0.0; 805 } 806 for (CeedInt j = 0; j<elem_size; ++j) { 807 for (CeedInt q = 0; q<num_qpts; ++q) { 808 const CeedInt qq = num_eval_mode_out*q; 809 for (CeedInt ei = 0; ei < num_eval_mode_in; ++ei) { 810 for (CeedInt ej = 0; ej < num_eval_mode_out; ++ej) { 811 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 812 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 813 } 814 } 815 } 816 } 817 818 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 819 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 820 821 // put element matrix in coordinate data structure 822 for (CeedInt i = 0; i < elem_size; ++i) { 823 for (CeedInt j = 0; j < elem_size; ++j) { 824 vals[offset + count] = elem_mat[i*elem_size + j]; 825 count++; 826 } 827 } 828 } 829 } 830 } 831 if (count != local_num_entries) 832 // LCOV_EXCL_START 833 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 834 // LCOV_EXCL_STOP 835 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 836 837 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 838 CeedChk(ierr); 839 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 840 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 841 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 842 843 return CEED_ERROR_SUCCESS; 844 } 845 846 /** 847 @brief Count number of entries for assembled CeedOperator 848 849 @param[in] op CeedOperator to assemble 850 @param[out] num_entries Number of entries in assembled representation 851 852 @return An error code: 0 - success, otherwise - failure 853 854 @ref Utility 855 **/ 856 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 857 CeedInt *num_entries) { 858 int ierr; 859 CeedElemRestriction rstr; 860 CeedInt num_elem, elem_size, num_comp; 861 862 if (op->is_composite) 863 // LCOV_EXCL_START 864 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 865 "Composite operator not supported"); 866 // LCOV_EXCL_STOP 867 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 868 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 869 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 870 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 871 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 872 873 return CEED_ERROR_SUCCESS; 874 } 875 876 /** 877 @brief Common code for creating a multigrid coarse operator and level 878 transfer operators for a CeedOperator 879 880 @param[in] op_fine Fine grid operator 881 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 882 @param[in] rstr_coarse Coarse grid restriction 883 @param[in] basis_coarse Coarse grid active vector basis 884 @param[in] basis_c_to_f Basis for coarse to fine interpolation 885 @param[out] op_coarse Coarse grid operator 886 @param[out] op_prolong Coarse to fine operator 887 @param[out] op_restrict Fine to coarse operator 888 889 @return An error code: 0 - success, otherwise - failure 890 891 @ref Developer 892 **/ 893 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 894 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 895 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 896 CeedOperator *op_restrict) { 897 int ierr; 898 Ceed ceed; 899 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 900 901 // Check for composite operator 902 bool is_composite; 903 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 904 if (is_composite) 905 // LCOV_EXCL_START 906 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 907 "Automatic multigrid setup for composite operators not supported"); 908 // LCOV_EXCL_STOP 909 910 // Coarse Grid 911 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 912 op_coarse); CeedChk(ierr); 913 CeedElemRestriction rstr_fine = NULL; 914 // -- Clone input fields 915 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 916 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 917 rstr_fine = op_fine->input_fields[i]->elem_restr; 918 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 919 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 920 CeedChk(ierr); 921 } else { 922 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 923 op_fine->input_fields[i]->elem_restr, 924 op_fine->input_fields[i]->basis, 925 op_fine->input_fields[i]->vec); CeedChk(ierr); 926 } 927 } 928 // -- Clone output fields 929 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 930 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 931 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 932 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 933 CeedChk(ierr); 934 } else { 935 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 936 op_fine->output_fields[i]->elem_restr, 937 op_fine->output_fields[i]->basis, 938 op_fine->output_fields[i]->vec); CeedChk(ierr); 939 } 940 } 941 // -- Clone QFunctionAssemblyData 942 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 943 &(*op_coarse)->qf_assembled); CeedChk(ierr); 944 945 // Multiplicity vector 946 CeedVector mult_vec, mult_e_vec; 947 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 948 CeedChk(ierr); 949 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 950 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 951 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 952 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 953 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 954 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 955 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 956 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 957 958 // Restriction 959 CeedInt num_comp; 960 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 961 CeedQFunction qf_restrict; 962 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 963 CeedChk(ierr); 964 CeedInt *num_comp_r_data; 965 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 966 num_comp_r_data[0] = num_comp; 967 CeedQFunctionContext ctx_r; 968 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 969 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 970 sizeof(*num_comp_r_data), num_comp_r_data); 971 CeedChk(ierr); 972 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 973 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 974 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 975 CeedChk(ierr); 976 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 977 CeedChk(ierr); 978 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 979 CEED_EVAL_INTERP); CeedChk(ierr); 980 ierr = CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp); CeedChk(ierr); 981 982 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 983 CEED_QFUNCTION_NONE, op_restrict); 984 CeedChk(ierr); 985 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 986 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 987 CeedChk(ierr); 988 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 989 CEED_BASIS_COLLOCATED, mult_vec); 990 CeedChk(ierr); 991 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 992 CEED_VECTOR_ACTIVE); CeedChk(ierr); 993 994 // Prolongation 995 CeedQFunction qf_prolong; 996 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 997 CeedChk(ierr); 998 CeedInt *num_comp_p_data; 999 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 1000 num_comp_p_data[0] = num_comp; 1001 CeedQFunctionContext ctx_p; 1002 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 1003 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 1004 sizeof(*num_comp_p_data), num_comp_p_data); 1005 CeedChk(ierr); 1006 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 1007 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 1008 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 1009 CeedChk(ierr); 1010 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 1011 CeedChk(ierr); 1012 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 1013 CeedChk(ierr); 1014 ierr = CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp); CeedChk(ierr); 1015 1016 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 1017 CEED_QFUNCTION_NONE, op_prolong); 1018 CeedChk(ierr); 1019 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 1020 CEED_VECTOR_ACTIVE); CeedChk(ierr); 1021 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 1022 CEED_BASIS_COLLOCATED, mult_vec); 1023 CeedChk(ierr); 1024 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 1025 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 1026 CeedChk(ierr); 1027 1028 // Clone name 1029 bool has_name = op_fine->name; 1030 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 1031 ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr); 1032 { 1033 char *prolongation_name; 1034 ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr); 1035 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", 1036 op_fine->name); 1037 ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr); 1038 ierr = CeedFree(&prolongation_name); CeedChk(ierr); 1039 } 1040 { 1041 char *restriction_name; 1042 ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr); 1043 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", 1044 op_fine->name); 1045 ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr); 1046 ierr = CeedFree(&restriction_name); CeedChk(ierr); 1047 } 1048 1049 // Cleanup 1050 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 1051 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 1052 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 1053 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 1054 1055 return CEED_ERROR_SUCCESS; 1056 } 1057 1058 /** 1059 @brief Build 1D mass matrix and Laplacian with perturbation 1060 1061 @param[in] interp_1d Interpolation matrix in one dimension 1062 @param[in] grad_1d Gradient matrix in one dimension 1063 @param[in] q_weight_1d Quadrature weights in one dimension 1064 @param[in] P_1d Number of basis nodes in one dimension 1065 @param[in] Q_1d Number of quadrature points in one dimension 1066 @param[in] dim Dimension of basis 1067 @param[out] mass Assembled mass matrix in one dimension 1068 @param[out] laplace Assembled perturbed Laplacian in one dimension 1069 1070 @return An error code: 0 - success, otherwise - failure 1071 1072 @ref Developer 1073 **/ 1074 CeedPragmaOptimizeOff 1075 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 1076 const CeedScalar *grad_1d, 1077 const CeedScalar *q_weight_1d, CeedInt P_1d, 1078 CeedInt Q_1d, CeedInt dim, 1079 CeedScalar *mass, CeedScalar *laplace) { 1080 1081 for (CeedInt i=0; i<P_1d; i++) 1082 for (CeedInt j=0; j<P_1d; j++) { 1083 CeedScalar sum = 0.0; 1084 for (CeedInt k=0; k<Q_1d; k++) 1085 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1086 mass[i+j*P_1d] = sum; 1087 } 1088 // -- Laplacian 1089 for (CeedInt i=0; i<P_1d; i++) 1090 for (CeedInt j=0; j<P_1d; j++) { 1091 CeedScalar sum = 0.0; 1092 for (CeedInt k=0; k<Q_1d; k++) 1093 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1094 laplace[i+j*P_1d] = sum; 1095 } 1096 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1097 for (CeedInt i=0; i<P_1d; i++) 1098 laplace[i+P_1d*i] += perturbation; 1099 return CEED_ERROR_SUCCESS; 1100 } 1101 CeedPragmaOptimizeOn 1102 1103 /// @} 1104 1105 /// ---------------------------------------------------------------------------- 1106 /// CeedOperator Backend API 1107 /// ---------------------------------------------------------------------------- 1108 /// @addtogroup CeedOperatorBackend 1109 /// @{ 1110 1111 /** 1112 @brief Create object holding CeedQFunction assembly data for CeedOperator 1113 1114 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1115 @param[out] data Address of the variable where the newly created 1116 CeedQFunctionAssemblyData will be stored 1117 1118 @return An error code: 0 - success, otherwise - failure 1119 1120 @ref Backend 1121 **/ 1122 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1123 CeedQFunctionAssemblyData *data) { 1124 int ierr; 1125 1126 ierr = CeedCalloc(1, data); CeedChk(ierr); 1127 (*data)->ref_count = 1; 1128 (*data)->ceed = ceed; 1129 ierr = CeedReference(ceed); CeedChk(ierr); 1130 1131 return CEED_ERROR_SUCCESS; 1132 } 1133 1134 /** 1135 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1136 1137 @param data CeedQFunctionAssemblyData to increment the reference counter 1138 1139 @return An error code: 0 - success, otherwise - failure 1140 1141 @ref Backend 1142 **/ 1143 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1144 data->ref_count++; 1145 return CEED_ERROR_SUCCESS; 1146 } 1147 1148 /** 1149 @brief Set re-use of CeedQFunctionAssemblyData 1150 1151 @param data CeedQFunctionAssemblyData to mark for reuse 1152 @param reuse_data Boolean flag indicating data re-use 1153 1154 @return An error code: 0 - success, otherwise - failure 1155 1156 @ref Backend 1157 **/ 1158 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, 1159 bool reuse_data) { 1160 data->reuse_data = reuse_data; 1161 data->needs_data_update = true; 1162 return CEED_ERROR_SUCCESS; 1163 } 1164 1165 /** 1166 @brief Mark QFunctionAssemblyData as stale 1167 1168 @param data CeedQFunctionAssemblyData to mark as stale 1169 @param needs_data_update Boolean flag indicating if update is needed or completed 1170 1171 @return An error code: 0 - success, otherwise - failure 1172 1173 @ref Backend 1174 **/ 1175 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, 1176 bool needs_data_update) { 1177 data->needs_data_update = needs_data_update; 1178 return CEED_ERROR_SUCCESS; 1179 } 1180 1181 /** 1182 @brief Determine if QFunctionAssemblyData needs update 1183 1184 @param[in] data CeedQFunctionAssemblyData to mark as stale 1185 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1186 1187 @return An error code: 0 - success, otherwise - failure 1188 1189 @ref Backend 1190 **/ 1191 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1192 bool *is_update_needed) { 1193 *is_update_needed = !data->reuse_data || data->needs_data_update; 1194 return CEED_ERROR_SUCCESS; 1195 } 1196 1197 /** 1198 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1199 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1200 Note: If `*data_copy` is non-NULL, then it is assumed that 1201 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1202 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1203 the only reference to this CeedQFunctionAssemblyData. 1204 1205 @param data CeedQFunctionAssemblyData to copy reference to 1206 @param[out] data_copy Variable to store copied reference 1207 1208 @return An error code: 0 - success, otherwise - failure 1209 1210 @ref Backend 1211 **/ 1212 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1213 CeedQFunctionAssemblyData *data_copy) { 1214 int ierr; 1215 1216 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1217 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1218 *data_copy = data; 1219 return CEED_ERROR_SUCCESS; 1220 } 1221 1222 /** 1223 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1224 1225 @param[in] data CeedQFunctionAssemblyData to retreive status 1226 @param[out] is_setup Boolean flag for setup status 1227 1228 @return An error code: 0 - success, otherwise - failure 1229 1230 @ref Backend 1231 **/ 1232 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1233 bool *is_setup) { 1234 *is_setup = data->is_setup; 1235 return CEED_ERROR_SUCCESS; 1236 } 1237 1238 /** 1239 @brief Set internal objects for CeedQFunctionAssemblyData 1240 1241 @param[in] data CeedQFunctionAssemblyData to set objects 1242 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1243 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1244 1245 @return An error code: 0 - success, otherwise - failure 1246 1247 @ref Backend 1248 **/ 1249 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1250 CeedVector vec, CeedElemRestriction rstr) { 1251 int ierr; 1252 1253 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1254 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1255 1256 data->is_setup = true; 1257 return CEED_ERROR_SUCCESS; 1258 } 1259 1260 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1261 CeedVector *vec, CeedElemRestriction *rstr) { 1262 int ierr; 1263 1264 if (!data->is_setup) 1265 // LCOV_EXCL_START 1266 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1267 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1268 // LCOV_EXCL_STOP 1269 1270 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1271 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1272 1273 return CEED_ERROR_SUCCESS; 1274 } 1275 1276 /** 1277 @brief Destroy CeedQFunctionAssemblyData 1278 1279 @param[out] data CeedQFunctionAssemblyData to destroy 1280 1281 @return An error code: 0 - success, otherwise - failure 1282 1283 @ref Backend 1284 **/ 1285 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1286 int ierr; 1287 1288 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1289 1290 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1291 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1292 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1293 1294 ierr = CeedFree(data); CeedChk(ierr); 1295 return CEED_ERROR_SUCCESS; 1296 } 1297 1298 /// @} 1299 1300 /// ---------------------------------------------------------------------------- 1301 /// CeedOperator Public API 1302 /// ---------------------------------------------------------------------------- 1303 /// @addtogroup CeedOperatorUser 1304 /// @{ 1305 1306 /** 1307 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1308 1309 This returns a CeedVector containing a matrix at each quadrature point 1310 providing the action of the CeedQFunction associated with the CeedOperator. 1311 The vector 'assembled' is of shape 1312 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1313 and contains column-major matrices representing the action of the 1314 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1315 outputs are in the order provided by the user when adding CeedOperator fields. 1316 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1317 'v', provided in that order, would result in an assembled QFunction that 1318 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1319 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1320 1321 Note: Calling this function asserts that setup is complete 1322 and sets the CeedOperator as immutable. 1323 1324 @param op CeedOperator to assemble CeedQFunction 1325 @param[out] assembled CeedVector to store assembled CeedQFunction at 1326 quadrature points 1327 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1328 CeedQFunction 1329 @param request Address of CeedRequest for non-blocking completion, else 1330 @ref CEED_REQUEST_IMMEDIATE 1331 1332 @return An error code: 0 - success, otherwise - failure 1333 1334 @ref User 1335 **/ 1336 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1337 CeedElemRestriction *rstr, 1338 CeedRequest *request) { 1339 int ierr; 1340 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1341 1342 // Backend version 1343 if (op->LinearAssembleQFunction) { 1344 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1345 CeedChk(ierr); 1346 } else { 1347 // Fallback to reference Ceed 1348 if (!op->op_fallback) { 1349 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1350 } 1351 // Assemble 1352 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1353 rstr, request); CeedChk(ierr); 1354 } 1355 return CEED_ERROR_SUCCESS; 1356 } 1357 1358 /** 1359 @brief Assemble CeedQFunction and store result internall. Return copied 1360 references of stored data to the caller. Caller is responsible for 1361 ownership and destruction of the copied references. See also 1362 @ref CeedOperatorLinearAssembleQFunction 1363 1364 @param op CeedOperator to assemble CeedQFunction 1365 @param assembled CeedVector to store assembled CeedQFunction at 1366 quadrature points 1367 @param rstr CeedElemRestriction for CeedVector containing assembled 1368 CeedQFunction 1369 @param request Address of CeedRequest for non-blocking completion, else 1370 @ref CEED_REQUEST_IMMEDIATE 1371 1372 @return An error code: 0 - success, otherwise - failure 1373 1374 @ref User 1375 **/ 1376 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1377 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1378 int ierr; 1379 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1380 1381 // Backend version 1382 if (op->LinearAssembleQFunctionUpdate) { 1383 bool qf_assembled_is_setup; 1384 CeedVector assembled_vec = NULL; 1385 CeedElemRestriction assembled_rstr = NULL; 1386 1387 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1388 &qf_assembled_is_setup); CeedChk(ierr); 1389 if (qf_assembled_is_setup) { 1390 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1391 &assembled_rstr); CeedChk(ierr); 1392 1393 bool update_needed; 1394 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1395 &update_needed); CeedChk(ierr); 1396 if (update_needed) { 1397 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1398 request); CeedChk(ierr); 1399 } 1400 } else { 1401 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1402 request); CeedChk(ierr); 1403 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1404 assembled_rstr); CeedChk(ierr); 1405 } 1406 ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); 1407 CeedChk(ierr); 1408 1409 // Copy reference to internally held copy 1410 *assembled = NULL; 1411 *rstr = NULL; 1412 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1413 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1414 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1415 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1416 } else { 1417 // Fallback to reference Ceed 1418 if (!op->op_fallback) { 1419 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1420 } 1421 // Assemble 1422 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1423 assembled, rstr, request); CeedChk(ierr); 1424 } 1425 1426 return CEED_ERROR_SUCCESS; 1427 } 1428 1429 /** 1430 @brief Assemble the diagonal of a square linear CeedOperator 1431 1432 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1433 1434 Note: Currently only non-composite CeedOperators with a single field and 1435 composite CeedOperators with single field sub-operators are supported. 1436 1437 Note: Calling this function asserts that setup is complete 1438 and sets the CeedOperator as immutable. 1439 1440 @param op CeedOperator to assemble CeedQFunction 1441 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1442 @param request Address of CeedRequest for non-blocking completion, else 1443 @ref CEED_REQUEST_IMMEDIATE 1444 1445 @return An error code: 0 - success, otherwise - failure 1446 1447 @ref User 1448 **/ 1449 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1450 CeedRequest *request) { 1451 int ierr; 1452 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1453 1454 CeedSize input_size = 0, output_size = 0; 1455 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1456 CeedChk(ierr); 1457 if (input_size != output_size) 1458 // LCOV_EXCL_START 1459 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1460 // LCOV_EXCL_STOP 1461 1462 // Use backend version, if available 1463 if (op->LinearAssembleDiagonal) { 1464 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1465 return CEED_ERROR_SUCCESS; 1466 } else if (op->LinearAssembleAddDiagonal) { 1467 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1468 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1469 return CEED_ERROR_SUCCESS; 1470 } else { 1471 // Check for valid fallback resource 1472 const char *resource, *fallback_resource; 1473 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1474 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1475 CeedChk(ierr); 1476 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1477 // Fallback to reference Ceed 1478 if (!op->op_fallback) { 1479 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1480 } 1481 // Assemble 1482 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1483 CeedChk(ierr); 1484 return CEED_ERROR_SUCCESS; 1485 } 1486 } 1487 1488 // Default interface implementation 1489 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1490 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1491 CeedChk(ierr); 1492 return CEED_ERROR_SUCCESS; 1493 } 1494 1495 /** 1496 @brief Assemble the diagonal of a square linear CeedOperator 1497 1498 This sums into a CeedVector the diagonal of a linear CeedOperator. 1499 1500 Note: Currently only non-composite CeedOperators with a single field and 1501 composite CeedOperators with single field sub-operators are supported. 1502 1503 Note: Calling this function asserts that setup is complete 1504 and sets the CeedOperator as immutable. 1505 1506 @param op CeedOperator to assemble CeedQFunction 1507 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1508 @param request Address of CeedRequest for non-blocking completion, else 1509 @ref CEED_REQUEST_IMMEDIATE 1510 1511 @return An error code: 0 - success, otherwise - failure 1512 1513 @ref User 1514 **/ 1515 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1516 CeedRequest *request) { 1517 int ierr; 1518 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1519 1520 CeedSize input_size = 0, output_size = 0; 1521 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1522 CeedChk(ierr); 1523 if (input_size != output_size) 1524 // LCOV_EXCL_START 1525 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1526 // LCOV_EXCL_STOP 1527 1528 // Use backend version, if available 1529 if (op->LinearAssembleAddDiagonal) { 1530 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1531 return CEED_ERROR_SUCCESS; 1532 } else { 1533 // Check for valid fallback resource 1534 const char *resource, *fallback_resource; 1535 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1536 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1537 CeedChk(ierr); 1538 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1539 // Fallback to reference Ceed 1540 if (!op->op_fallback) { 1541 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1542 } 1543 // Assemble 1544 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1545 request); CeedChk(ierr); 1546 return CEED_ERROR_SUCCESS; 1547 } 1548 } 1549 1550 // Default interface implementation 1551 bool is_composite; 1552 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1553 if (is_composite) { 1554 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1555 false, assembled); CeedChk(ierr); 1556 return CEED_ERROR_SUCCESS; 1557 } else { 1558 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1559 CeedChk(ierr); 1560 return CEED_ERROR_SUCCESS; 1561 } 1562 } 1563 1564 /** 1565 @brief Assemble the point block diagonal of a square linear CeedOperator 1566 1567 This overwrites a CeedVector with the point block diagonal of a linear 1568 CeedOperator. 1569 1570 Note: Currently only non-composite CeedOperators with a single field and 1571 composite CeedOperators with single field sub-operators are supported. 1572 1573 Note: Calling this function asserts that setup is complete 1574 and sets the CeedOperator as immutable. 1575 1576 @param op CeedOperator to assemble CeedQFunction 1577 @param[out] assembled CeedVector to store assembled CeedOperator point block 1578 diagonal, provided in row-major form with an 1579 @a num_comp * @a num_comp block at each node. The dimensions 1580 of this vector are derived from the active vector 1581 for the CeedOperator. The array has shape 1582 [nodes, component out, component in]. 1583 @param request Address of CeedRequest for non-blocking completion, else 1584 CEED_REQUEST_IMMEDIATE 1585 1586 @return An error code: 0 - success, otherwise - failure 1587 1588 @ref User 1589 **/ 1590 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1591 CeedVector assembled, CeedRequest *request) { 1592 int ierr; 1593 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1594 1595 CeedSize input_size = 0, output_size = 0; 1596 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1597 CeedChk(ierr); 1598 if (input_size != output_size) 1599 // LCOV_EXCL_START 1600 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1601 // LCOV_EXCL_STOP 1602 1603 // Use backend version, if available 1604 if (op->LinearAssemblePointBlockDiagonal) { 1605 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1606 CeedChk(ierr); 1607 return CEED_ERROR_SUCCESS; 1608 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1609 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1610 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1611 request); CeedChk(ierr); 1612 return CEED_ERROR_SUCCESS; 1613 } else { 1614 // Check for valid fallback resource 1615 const char *resource, *fallback_resource; 1616 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1617 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1618 CeedChk(ierr); 1619 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1620 // Fallback to reference Ceed 1621 if (!op->op_fallback) { 1622 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1623 } 1624 // Assemble 1625 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1626 assembled, request); CeedChk(ierr); 1627 return CEED_ERROR_SUCCESS; 1628 } 1629 } 1630 1631 // Default interface implementation 1632 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1633 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1634 CeedChk(ierr); 1635 return CEED_ERROR_SUCCESS; 1636 } 1637 1638 /** 1639 @brief Assemble the point block diagonal of a square linear CeedOperator 1640 1641 This sums into a CeedVector with the point block diagonal of a linear 1642 CeedOperator. 1643 1644 Note: Currently only non-composite CeedOperators with a single field and 1645 composite CeedOperators with single field sub-operators are supported. 1646 1647 Note: Calling this function asserts that setup is complete 1648 and sets the CeedOperator as immutable. 1649 1650 @param op CeedOperator to assemble CeedQFunction 1651 @param[out] assembled CeedVector to store assembled CeedOperator point block 1652 diagonal, provided in row-major form with an 1653 @a num_comp * @a num_comp block at each node. The dimensions 1654 of this vector are derived from the active vector 1655 for the CeedOperator. The array has shape 1656 [nodes, component out, component in]. 1657 @param request Address of CeedRequest for non-blocking completion, else 1658 CEED_REQUEST_IMMEDIATE 1659 1660 @return An error code: 0 - success, otherwise - failure 1661 1662 @ref User 1663 **/ 1664 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1665 CeedVector assembled, CeedRequest *request) { 1666 int ierr; 1667 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1668 1669 CeedSize input_size = 0, output_size = 0; 1670 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1671 CeedChk(ierr); 1672 if (input_size != output_size) 1673 // LCOV_EXCL_START 1674 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1675 // LCOV_EXCL_STOP 1676 1677 // Use backend version, if available 1678 if (op->LinearAssembleAddPointBlockDiagonal) { 1679 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1680 CeedChk(ierr); 1681 return CEED_ERROR_SUCCESS; 1682 } else { 1683 // Check for valid fallback resource 1684 const char *resource, *fallback_resource; 1685 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1686 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1687 CeedChk(ierr); 1688 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1689 // Fallback to reference Ceed 1690 if (!op->op_fallback) { 1691 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1692 } 1693 // Assemble 1694 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1695 assembled, request); CeedChk(ierr); 1696 return CEED_ERROR_SUCCESS; 1697 } 1698 } 1699 1700 // Default interface implemenation 1701 bool is_composite; 1702 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1703 if (is_composite) { 1704 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1705 true, assembled); CeedChk(ierr); 1706 return CEED_ERROR_SUCCESS; 1707 } else { 1708 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1709 CeedChk(ierr); 1710 return CEED_ERROR_SUCCESS; 1711 } 1712 } 1713 1714 /** 1715 @brief Fully assemble the nonzero pattern of a linear operator. 1716 1717 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1718 1719 The assembly routines use coordinate format, with num_entries tuples of the 1720 form (i, j, value) which indicate that value should be added to the matrix 1721 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1722 This function returns the number of entries and their (i, j) locations, 1723 while CeedOperatorLinearAssemble() provides the values in the same 1724 ordering. 1725 1726 This will generally be slow unless your operator is low-order. 1727 1728 Note: Calling this function asserts that setup is complete 1729 and sets the CeedOperator as immutable. 1730 1731 @param[in] op CeedOperator to assemble 1732 @param[out] num_entries Number of entries in coordinate nonzero pattern 1733 @param[out] rows Row number for each entry 1734 @param[out] cols Column number for each entry 1735 1736 @ref User 1737 **/ 1738 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1739 CeedInt **rows, CeedInt **cols) { 1740 int ierr; 1741 CeedInt num_suboperators, single_entries; 1742 CeedOperator *sub_operators; 1743 bool is_composite; 1744 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1745 1746 // Use backend version, if available 1747 if (op->LinearAssembleSymbolic) { 1748 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1749 return CEED_ERROR_SUCCESS; 1750 } else { 1751 // Check for valid fallback resource 1752 const char *resource, *fallback_resource; 1753 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1754 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1755 CeedChk(ierr); 1756 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1757 // Fallback to reference Ceed 1758 if (!op->op_fallback) { 1759 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1760 } 1761 // Assemble 1762 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1763 cols); CeedChk(ierr); 1764 return CEED_ERROR_SUCCESS; 1765 } 1766 } 1767 1768 // Default interface implementation 1769 1770 // count entries and allocate rows, cols arrays 1771 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1772 *num_entries = 0; 1773 if (is_composite) { 1774 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1775 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1776 for (CeedInt k = 0; k < num_suboperators; ++k) { 1777 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1778 &single_entries); CeedChk(ierr); 1779 *num_entries += single_entries; 1780 } 1781 } else { 1782 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1783 &single_entries); CeedChk(ierr); 1784 *num_entries += single_entries; 1785 } 1786 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1787 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1788 1789 // assemble nonzero locations 1790 CeedInt offset = 0; 1791 if (is_composite) { 1792 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1793 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1794 for (CeedInt k = 0; k < num_suboperators; ++k) { 1795 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1796 *cols); CeedChk(ierr); 1797 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1798 &single_entries); 1799 CeedChk(ierr); 1800 offset += single_entries; 1801 } 1802 } else { 1803 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1804 CeedChk(ierr); 1805 } 1806 1807 return CEED_ERROR_SUCCESS; 1808 } 1809 1810 /** 1811 @brief Fully assemble the nonzero entries of a linear operator. 1812 1813 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1814 1815 The assembly routines use coordinate format, with num_entries tuples of the 1816 form (i, j, value) which indicate that value should be added to the matrix 1817 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1818 This function returns the values of the nonzero entries to be added, their 1819 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1820 1821 This will generally be slow unless your operator is low-order. 1822 1823 Note: Calling this function asserts that setup is complete 1824 and sets the CeedOperator as immutable. 1825 1826 @param[in] op CeedOperator to assemble 1827 @param[out] values Values to assemble into matrix 1828 1829 @ref User 1830 **/ 1831 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1832 int ierr; 1833 CeedInt num_suboperators, single_entries = 0; 1834 CeedOperator *sub_operators; 1835 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1836 1837 // Use backend version, if available 1838 if (op->LinearAssemble) { 1839 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1840 return CEED_ERROR_SUCCESS; 1841 } else { 1842 // Check for valid fallback resource 1843 const char *resource, *fallback_resource; 1844 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1845 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1846 CeedChk(ierr); 1847 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1848 // Fallback to reference Ceed 1849 if (!op->op_fallback) { 1850 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1851 } 1852 // Assemble 1853 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1854 return CEED_ERROR_SUCCESS; 1855 } 1856 } 1857 1858 // Default interface implementation 1859 bool is_composite; 1860 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1861 1862 CeedInt offset = 0; 1863 if (is_composite) { 1864 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1865 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1866 for (CeedInt k = 0; k < num_suboperators; ++k) { 1867 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1868 CeedChk(ierr); 1869 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1870 &single_entries); 1871 CeedChk(ierr); 1872 offset += single_entries; 1873 } 1874 } else { 1875 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1876 } 1877 1878 return CEED_ERROR_SUCCESS; 1879 } 1880 1881 /** 1882 @brief Create a multigrid coarse operator and level transfer operators 1883 for a CeedOperator, creating the prolongation basis from the 1884 fine and coarse grid interpolation 1885 1886 Note: Calling this function asserts that setup is complete 1887 and sets the CeedOperator as immutable. 1888 1889 @param[in] op_fine Fine grid operator 1890 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1891 @param[in] rstr_coarse Coarse grid restriction 1892 @param[in] basis_coarse Coarse grid active vector basis 1893 @param[out] op_coarse Coarse grid operator 1894 @param[out] op_prolong Coarse to fine operator 1895 @param[out] op_restrict Fine to coarse operator 1896 1897 @return An error code: 0 - success, otherwise - failure 1898 1899 @ref User 1900 **/ 1901 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1902 CeedVector p_mult_fine, 1903 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1904 CeedOperator *op_coarse, CeedOperator *op_prolong, 1905 CeedOperator *op_restrict) { 1906 int ierr; 1907 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1908 Ceed ceed; 1909 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1910 1911 // Check for compatible quadrature spaces 1912 CeedBasis basis_fine; 1913 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1914 CeedInt Q_f, Q_c; 1915 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1916 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1917 if (Q_f != Q_c) 1918 // LCOV_EXCL_START 1919 return CeedError(ceed, CEED_ERROR_DIMENSION, 1920 "Bases must have compatible quadrature spaces"); 1921 // LCOV_EXCL_STOP 1922 1923 // Coarse to fine basis 1924 CeedInt P_f, P_c, Q = Q_f; 1925 bool is_tensor_f, is_tensor_c; 1926 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1927 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1928 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1929 if (is_tensor_f && is_tensor_c) { 1930 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1931 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1932 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1933 } else if (!is_tensor_f && !is_tensor_c) { 1934 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1935 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1936 } else { 1937 // LCOV_EXCL_START 1938 return CeedError(ceed, CEED_ERROR_MINOR, 1939 "Bases must both be tensor or non-tensor"); 1940 // LCOV_EXCL_STOP 1941 } 1942 1943 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1944 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1945 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1946 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1947 const CeedScalar *interp_f_source = NULL, *interp_c_source = NULL; 1948 if (is_tensor_f) { 1949 ierr = CeedBasisGetInterp1D(basis_fine, &interp_f_source); CeedChk(ierr); 1950 ierr = CeedBasisGetInterp1D(basis_coarse, &interp_c_source); CeedChk(ierr); 1951 } else { 1952 ierr = CeedBasisGetInterp(basis_fine, &interp_f_source); CeedChk(ierr); 1953 ierr = CeedBasisGetInterp(basis_coarse, &interp_c_source); CeedChk(ierr); 1954 } 1955 memcpy(interp_f, interp_f_source, Q*P_f*sizeof interp_f_source[0]); 1956 memcpy(interp_c, interp_c_source, Q*P_c*sizeof interp_c_source[0]); 1957 1958 // -- QR Factorization, interp_f = Q R 1959 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1960 1961 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1962 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1963 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1964 1965 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1966 for (CeedInt j=0; j<P_c; j++) { // Column j 1967 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1968 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1969 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1970 for (CeedInt k=i+1; k<P_f; k++) 1971 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1972 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1973 } 1974 } 1975 ierr = CeedFree(&tau); CeedChk(ierr); 1976 ierr = CeedFree(&interp_c); CeedChk(ierr); 1977 ierr = CeedFree(&interp_f); CeedChk(ierr); 1978 1979 // Complete with interp_c_to_f versions of code 1980 if (is_tensor_f) { 1981 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1982 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1983 CeedChk(ierr); 1984 } else { 1985 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1986 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1987 CeedChk(ierr); 1988 } 1989 1990 // Cleanup 1991 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1992 return CEED_ERROR_SUCCESS; 1993 } 1994 1995 /** 1996 @brief Create a multigrid coarse operator and level transfer operators 1997 for a CeedOperator with a tensor basis for the active basis 1998 1999 Note: Calling this function asserts that setup is complete 2000 and sets the CeedOperator as immutable. 2001 2002 @param[in] op_fine Fine grid operator 2003 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2004 @param[in] rstr_coarse Coarse grid restriction 2005 @param[in] basis_coarse Coarse grid active vector basis 2006 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 2007 @param[out] op_coarse Coarse grid operator 2008 @param[out] op_prolong Coarse to fine operator 2009 @param[out] op_restrict Fine to coarse operator 2010 2011 @return An error code: 0 - success, otherwise - failure 2012 2013 @ref User 2014 **/ 2015 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 2016 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2017 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 2018 CeedOperator *op_prolong, CeedOperator *op_restrict) { 2019 int ierr; 2020 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2021 Ceed ceed; 2022 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2023 2024 // Check for compatible quadrature spaces 2025 CeedBasis basis_fine; 2026 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2027 CeedInt Q_f, Q_c; 2028 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2029 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2030 if (Q_f != Q_c) 2031 // LCOV_EXCL_START 2032 return CeedError(ceed, CEED_ERROR_DIMENSION, 2033 "Bases must have compatible quadrature spaces"); 2034 // LCOV_EXCL_STOP 2035 2036 // Coarse to fine basis 2037 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2038 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2039 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2040 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 2041 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2042 CeedChk(ierr); 2043 P_1d_c = dim == 1 ? num_nodes_c : 2044 dim == 2 ? sqrt(num_nodes_c) : 2045 cbrt(num_nodes_c); 2046 CeedScalar *q_ref, *q_weight, *grad; 2047 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 2048 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 2049 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 2050 CeedBasis basis_c_to_f; 2051 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 2052 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2053 CeedChk(ierr); 2054 ierr = CeedFree(&q_ref); CeedChk(ierr); 2055 ierr = CeedFree(&q_weight); CeedChk(ierr); 2056 ierr = CeedFree(&grad); CeedChk(ierr); 2057 2058 // Core code 2059 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2060 basis_coarse, basis_c_to_f, op_coarse, 2061 op_prolong, op_restrict); 2062 CeedChk(ierr); 2063 return CEED_ERROR_SUCCESS; 2064 } 2065 2066 /** 2067 @brief Create a multigrid coarse operator and level transfer operators 2068 for a CeedOperator with a non-tensor basis for the active vector 2069 2070 Note: Calling this function asserts that setup is complete 2071 and sets the CeedOperator as immutable. 2072 2073 @param[in] op_fine Fine grid operator 2074 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2075 @param[in] rstr_coarse Coarse grid restriction 2076 @param[in] basis_coarse Coarse grid active vector basis 2077 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 2078 @param[out] op_coarse Coarse grid operator 2079 @param[out] op_prolong Coarse to fine operator 2080 @param[out] op_restrict Fine to coarse operator 2081 2082 @return An error code: 0 - success, otherwise - failure 2083 2084 @ref User 2085 **/ 2086 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 2087 CeedVector p_mult_fine, 2088 CeedElemRestriction rstr_coarse, 2089 CeedBasis basis_coarse, 2090 const CeedScalar *interp_c_to_f, 2091 CeedOperator *op_coarse, 2092 CeedOperator *op_prolong, 2093 CeedOperator *op_restrict) { 2094 int ierr; 2095 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2096 Ceed ceed; 2097 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2098 2099 // Check for compatible quadrature spaces 2100 CeedBasis basis_fine; 2101 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2102 CeedInt Q_f, Q_c; 2103 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2104 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2105 if (Q_f != Q_c) 2106 // LCOV_EXCL_START 2107 return CeedError(ceed, CEED_ERROR_DIMENSION, 2108 "Bases must have compatible quadrature spaces"); 2109 // LCOV_EXCL_STOP 2110 2111 // Coarse to fine basis 2112 CeedElemTopology topo; 2113 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 2114 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2115 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2116 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2117 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 2118 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2119 CeedChk(ierr); 2120 CeedScalar *q_ref, *q_weight, *grad; 2121 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 2122 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 2123 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 2124 CeedBasis basis_c_to_f; 2125 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 2126 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2127 CeedChk(ierr); 2128 ierr = CeedFree(&q_ref); CeedChk(ierr); 2129 ierr = CeedFree(&q_weight); CeedChk(ierr); 2130 ierr = CeedFree(&grad); CeedChk(ierr); 2131 2132 // Core code 2133 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2134 basis_coarse, basis_c_to_f, op_coarse, 2135 op_prolong, op_restrict); 2136 CeedChk(ierr); 2137 return CEED_ERROR_SUCCESS; 2138 } 2139 2140 /** 2141 @brief Build a FDM based approximate inverse for each element for a 2142 CeedOperator 2143 2144 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2145 Method based approximate inverse. This function obtains the simultaneous 2146 diagonalization for the 1D mass and Laplacian operators, 2147 M = V^T V, K = V^T S V. 2148 The assembled QFunction is used to modify the eigenvalues from simultaneous 2149 diagonalization and obtain an approximate inverse of the form 2150 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2151 associated CeedQFunction must therefore also be linear. 2152 2153 Note: Calling this function asserts that setup is complete 2154 and sets the CeedOperator as immutable. 2155 2156 @param op CeedOperator to create element inverses 2157 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2158 for each element 2159 @param request Address of CeedRequest for non-blocking completion, else 2160 @ref CEED_REQUEST_IMMEDIATE 2161 2162 @return An error code: 0 - success, otherwise - failure 2163 2164 @ref User 2165 **/ 2166 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2167 CeedRequest *request) { 2168 int ierr; 2169 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2170 2171 // Use backend version, if available 2172 if (op->CreateFDMElementInverse) { 2173 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2174 return CEED_ERROR_SUCCESS; 2175 } else { 2176 // Check for valid fallback resource 2177 const char *resource, *fallback_resource; 2178 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2179 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2180 CeedChk(ierr); 2181 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2182 // Fallback to reference Ceed 2183 if (!op->op_fallback) { 2184 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2185 } 2186 // Assemble 2187 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2188 CeedChk(ierr); 2189 return CEED_ERROR_SUCCESS; 2190 } 2191 } 2192 2193 // Interface implementation 2194 Ceed ceed, ceed_parent; 2195 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2196 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2197 ceed_parent = ceed_parent ? ceed_parent : ceed; 2198 CeedQFunction qf; 2199 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2200 2201 // Determine active input basis 2202 bool interp = false, grad = false; 2203 CeedBasis basis = NULL; 2204 CeedElemRestriction rstr = NULL; 2205 CeedOperatorField *op_fields; 2206 CeedQFunctionField *qf_fields; 2207 CeedInt num_input_fields; 2208 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2209 CeedChk(ierr); 2210 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2211 for (CeedInt i=0; i<num_input_fields; i++) { 2212 CeedVector vec; 2213 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2214 if (vec == CEED_VECTOR_ACTIVE) { 2215 CeedEvalMode eval_mode; 2216 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2217 interp = interp || eval_mode == CEED_EVAL_INTERP; 2218 grad = grad || eval_mode == CEED_EVAL_GRAD; 2219 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2220 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2221 } 2222 } 2223 if (!basis) 2224 // LCOV_EXCL_START 2225 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2226 // LCOV_EXCL_STOP 2227 CeedSize l_size = 1; 2228 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2229 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2230 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2231 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2232 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2233 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2234 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2235 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2236 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2237 2238 // Build and diagonalize 1D Mass and Laplacian 2239 bool tensor_basis; 2240 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2241 if (!tensor_basis) 2242 // LCOV_EXCL_START 2243 return CeedError(ceed, CEED_ERROR_BACKEND, 2244 "FDMElementInverse only supported for tensor " 2245 "bases"); 2246 // LCOV_EXCL_STOP 2247 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2248 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2249 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2250 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2251 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2252 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2253 // -- Build matrices 2254 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2255 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2256 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2257 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2258 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2259 mass, laplace); CeedChk(ierr); 2260 2261 // -- Diagonalize 2262 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2263 CeedChk(ierr); 2264 ierr = CeedFree(&mass); CeedChk(ierr); 2265 ierr = CeedFree(&laplace); CeedChk(ierr); 2266 for (CeedInt i=0; i<P_1d; i++) 2267 for (CeedInt j=0; j<P_1d; j++) 2268 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2269 ierr = CeedFree(&x); CeedChk(ierr); 2270 2271 // Assemble QFunction 2272 CeedVector assembled; 2273 CeedElemRestriction rstr_qf; 2274 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2275 &rstr_qf, request); CeedChk(ierr); 2276 CeedInt layout[3]; 2277 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2278 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2279 CeedScalar max_norm = 0; 2280 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2281 2282 // Calculate element averages 2283 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2284 CeedScalar *elem_avg; 2285 const CeedScalar *assembled_array, *q_weight_array; 2286 CeedVector q_weight; 2287 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2288 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2289 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2290 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2291 CeedChk(ierr); 2292 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2293 CeedChk(ierr); 2294 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2295 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2296 for (CeedInt e=0; e<num_elem; e++) { 2297 CeedInt count = 0; 2298 for (CeedInt q=0; q<num_qpts; q++) 2299 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2300 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2301 qf_value_bound) { 2302 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2303 q_weight_array[q]; 2304 count++; 2305 } 2306 if (count) { 2307 elem_avg[e] /= count; 2308 } else { 2309 elem_avg[e] = 1.0; 2310 } 2311 } 2312 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2313 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2314 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2315 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2316 2317 // Build FDM diagonal 2318 CeedVector q_data; 2319 CeedScalar *q_data_array, *fdm_diagonal; 2320 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2321 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2322 for (CeedInt c=0; c<num_comp; c++) 2323 for (CeedInt n=0; n<elem_size; n++) { 2324 if (interp) 2325 fdm_diagonal[c*elem_size + n] = 1.0; 2326 if (grad) 2327 for (CeedInt d=0; d<dim; d++) { 2328 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2329 fdm_diagonal[c*elem_size + n] += lambda[i]; 2330 } 2331 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2332 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2333 } 2334 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2335 CeedChk(ierr); 2336 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2337 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2338 CeedChk(ierr); 2339 for (CeedInt e=0; e<num_elem; e++) 2340 for (CeedInt c=0; c<num_comp; c++) 2341 for (CeedInt n=0; n<elem_size; n++) 2342 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2343 fdm_diagonal[c*elem_size + n]); 2344 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2345 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2346 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2347 2348 // Setup FDM operator 2349 // -- Basis 2350 CeedBasis fdm_basis; 2351 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2352 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2353 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2354 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2355 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2356 fdm_interp, grad_dummy, q_ref_dummy, 2357 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2358 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2359 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2360 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2361 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2362 ierr = CeedFree(&lambda); CeedChk(ierr); 2363 2364 // -- Restriction 2365 CeedElemRestriction rstr_qd_i; 2366 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2367 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2368 num_comp, num_elem*num_comp*elem_size, 2369 strides, &rstr_qd_i); CeedChk(ierr); 2370 // -- QFunction 2371 CeedQFunction qf_fdm; 2372 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2373 CeedChk(ierr); 2374 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2375 CeedChk(ierr); 2376 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2377 CeedChk(ierr); 2378 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2379 CeedChk(ierr); 2380 ierr = CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp); CeedChk(ierr); 2381 // -- QFunction context 2382 CeedInt *num_comp_data; 2383 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2384 num_comp_data[0] = num_comp; 2385 CeedQFunctionContext ctx_fdm; 2386 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2387 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2388 sizeof(*num_comp_data), num_comp_data); 2389 CeedChk(ierr); 2390 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2391 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2392 // -- Operator 2393 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2394 CeedChk(ierr); 2395 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2396 CeedChk(ierr); 2397 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2398 q_data); CeedChk(ierr); 2399 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2400 CeedChk(ierr); 2401 2402 // Cleanup 2403 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2404 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2405 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2406 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2407 2408 return CEED_ERROR_SUCCESS; 2409 } 2410 2411 /// @} 2412