1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed-impl.h> 11 #include <math.h> 12 #include <stdbool.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 /// @file 17 /// Implementation of CeedOperator preconditioning interfaces 18 19 /// ---------------------------------------------------------------------------- 20 /// CeedOperator Library Internal Preconditioning Functions 21 /// ---------------------------------------------------------------------------- 22 /// @addtogroup CeedOperatorDeveloper 23 /// @{ 24 25 /** 26 @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced 27 CeedOperator functionality 28 29 @param[in] qf CeedQFunction to create fallback for 30 @param[out] fallback_qf fallback CeedQFunction 31 32 @return An error code: 0 - success, otherwise - failure 33 34 @ref Developer 35 **/ 36 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, 37 CeedQFunction *qf_fallback) { 38 int ierr; 39 40 // Check if NULL qf passed in 41 if (!qf) return CEED_ERROR_SUCCESS; 42 43 CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 44 CeedDebug256(qf->ceed, 255, "Creating fallback CeedQFunction\n"); 45 46 char *source_path_with_name = ""; 47 if (qf->source_path) { 48 size_t path_len = strlen(qf->source_path), 49 name_len = strlen(qf->kernel_name); 50 ierr = CeedCalloc(path_len + name_len + 2, &source_path_with_name); 51 CeedChk(ierr); 52 memcpy(source_path_with_name, qf->source_path, path_len); 53 memcpy(&source_path_with_name[path_len], ":", 1); 54 memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); 55 } else { 56 ierr = CeedCalloc(1, &source_path_with_name); CeedChk(ierr); 57 } 58 59 ierr = CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, 60 qf->function, source_path_with_name, 61 qf_fallback); CeedChk(ierr); 62 { 63 CeedQFunctionContext ctx; 64 65 ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr); 66 ierr = CeedQFunctionSetContext(*qf_fallback, ctx); CeedChk(ierr); 67 } 68 for (CeedInt i = 0; i < qf->num_input_fields; i++) { 69 ierr = CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, 70 qf->input_fields[i]->size, 71 qf->input_fields[i]->eval_mode); CeedChk(ierr); 72 } 73 for (CeedInt i = 0; i < qf->num_output_fields; i++) { 74 ierr = CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, 75 qf->output_fields[i]->size, 76 qf->output_fields[i]->eval_mode); CeedChk(ierr); 77 } 78 ierr = CeedFree(&source_path_with_name); CeedChk(ierr); 79 80 return CEED_ERROR_SUCCESS; 81 } 82 83 /** 84 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 85 CeedOperator functionality 86 87 @param op CeedOperator to create fallback for 88 89 @return An error code: 0 - success, otherwise - failure 90 91 @ref Developer 92 **/ 93 static int CeedOperatorCreateFallback(CeedOperator op) { 94 int ierr; 95 Ceed ceed_fallback; 96 97 // Check not already created 98 if (op->op_fallback) return CEED_ERROR_SUCCESS; 99 100 // Fallback Ceed 101 ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr); 102 if (!ceed_fallback) return CEED_ERROR_SUCCESS; 103 104 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 105 CeedDebug256(op->ceed, 255, "Creating fallback CeedOperator\n"); 106 107 // Clone Op 108 CeedOperator op_fallback; 109 if (op->is_composite) { 110 ierr = CeedCompositeOperatorCreate(ceed_fallback, &op_fallback); 111 CeedChk(ierr); 112 for (CeedInt i = 0; i < op->num_suboperators; i++) { 113 CeedOperator op_sub_fallback; 114 115 ierr = CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback); 116 CeedChk(ierr); 117 ierr = CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback); CeedChk(ierr); 118 } 119 } else { 120 CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; 121 ierr = CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback); 122 CeedChk(ierr); 123 ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback); 124 CeedChk(ierr); 125 ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback); 126 CeedChk(ierr); 127 ierr = CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, 128 dqfT_fallback, &op_fallback); CeedChk(ierr); 129 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 130 ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, 131 op->input_fields[i]->elem_restr, 132 op->input_fields[i]->basis, 133 op->input_fields[i]->vec); CeedChk(ierr); 134 } 135 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 136 ierr = CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, 137 op->output_fields[i]->elem_restr, 138 op->output_fields[i]->basis, 139 op->output_fields[i]->vec); CeedChk(ierr); 140 } 141 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 142 &op_fallback->qf_assembled); CeedChk(ierr); 143 if (op_fallback->num_qpts == 0) { 144 ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts); 145 CeedChk(ierr); 146 } 147 // Cleanup 148 ierr = CeedQFunctionDestroy(&qf_fallback); CeedChk(ierr); 149 ierr = CeedQFunctionDestroy(&dqf_fallback); CeedChk(ierr); 150 ierr = CeedQFunctionDestroy(&dqfT_fallback); CeedChk(ierr); 151 } 152 ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr); 153 ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr); 154 op->op_fallback = op_fallback; 155 156 return CEED_ERROR_SUCCESS; 157 } 158 159 /** 160 @brief Retreive fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality 161 162 @param[in] op CeedOperator to retrieve fallback for 163 @param[out] op_fallback Fallback CeedOperator 164 165 @return An error code: 0 - success, otherwise - failure 166 167 @ref Developer 168 **/ 169 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { 170 int ierr; 171 172 // Create if needed 173 if (!op->op_fallback) { 174 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 175 } 176 if (op->op_fallback) { 177 bool is_debug; 178 179 ierr = CeedIsDebug(op->ceed, &is_debug); CeedChk(ierr); 180 if (is_debug) { 181 Ceed ceed_fallback; 182 const char *resource, *resource_fallback; 183 184 ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr); 185 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 186 ierr = CeedGetResource(ceed_fallback, &resource_fallback); CeedChk(ierr); 187 188 CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); 189 CeedDebug256(op->ceed, 255, 190 "Falling back from %s operator at address %ld to %s operator at address %ld\n", 191 resource, op, resource_fallback, op->op_fallback); 192 } 193 } 194 *op_fallback = op->op_fallback; 195 196 return CEED_ERROR_SUCCESS; 197 } 198 199 /** 200 @brief Select correct basis matrix pointer based on CeedEvalMode 201 202 @param[in] eval_mode Current basis evaluation mode 203 @param[in] identity Pointer to identity matrix 204 @param[in] interp Pointer to interpolation matrix 205 @param[in] grad Pointer to gradient matrix 206 @param[out] basis_ptr Basis pointer to set 207 208 @ref Developer 209 **/ 210 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 211 const CeedScalar *identity, const CeedScalar *interp, 212 const CeedScalar *grad, const CeedScalar **basis_ptr) { 213 switch (eval_mode) { 214 case CEED_EVAL_NONE: 215 *basis_ptr = identity; 216 break; 217 case CEED_EVAL_INTERP: 218 *basis_ptr = interp; 219 break; 220 case CEED_EVAL_GRAD: 221 *basis_ptr = grad; 222 break; 223 case CEED_EVAL_WEIGHT: 224 case CEED_EVAL_DIV: 225 case CEED_EVAL_CURL: 226 break; // Caught by QF Assembly 227 } 228 } 229 230 /** 231 @brief Create point block restriction for active operator field 232 233 @param[in] rstr Original CeedElemRestriction for active field 234 @param[out] pointblock_rstr Address of the variable where the newly created 235 CeedElemRestriction will be stored 236 237 @return An error code: 0 - success, otherwise - failure 238 239 @ref Developer 240 **/ 241 static int CeedOperatorCreateActivePointBlockRestriction( 242 CeedElemRestriction rstr, 243 CeedElemRestriction *pointblock_rstr) { 244 int ierr; 245 Ceed ceed; 246 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 247 const CeedInt *offsets; 248 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 249 CeedChk(ierr); 250 251 // Expand offsets 252 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 253 *pointblock_offsets; 254 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 255 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 256 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 257 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 258 CeedInt shift = num_comp; 259 if (comp_stride != 1) 260 shift *= num_comp; 261 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 262 CeedChk(ierr); 263 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 264 pointblock_offsets[i] = offsets[i]*shift; 265 if (pointblock_offsets[i] > max) 266 max = pointblock_offsets[i]; 267 } 268 269 // Create new restriction 270 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 271 1, max + num_comp*num_comp, CEED_MEM_HOST, 272 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 273 CeedChk(ierr); 274 275 // Cleanup 276 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 277 278 return CEED_ERROR_SUCCESS; 279 } 280 281 /** 282 @brief Core logic for assembling operator diagonal or point block diagonal 283 284 @param[in] op CeedOperator to assemble point block diagonal 285 @param[in] request Address of CeedRequest for non-blocking completion, else 286 CEED_REQUEST_IMMEDIATE 287 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 288 @param[out] assembled CeedVector to store assembled diagonal 289 290 @return An error code: 0 - success, otherwise - failure 291 292 @ref Developer 293 **/ 294 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 295 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 296 int ierr; 297 Ceed ceed; 298 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 299 300 // Assemble QFunction 301 CeedQFunction qf; 302 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 303 CeedInt num_input_fields, num_output_fields; 304 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 305 CeedChk(ierr); 306 CeedVector assembled_qf; 307 CeedElemRestriction rstr; 308 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 309 &rstr, request); CeedChk(ierr); 310 CeedInt layout[3]; 311 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 312 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 313 CeedScalar max_norm = 0; 314 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 315 316 // Determine active input basis 317 CeedOperatorField *op_fields; 318 CeedQFunctionField *qf_fields; 319 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 320 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 321 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 322 CeedEvalMode *eval_mode_in = NULL; 323 CeedBasis basis_in = NULL; 324 CeedElemRestriction rstr_in = NULL; 325 for (CeedInt i=0; i<num_input_fields; i++) { 326 CeedVector vec; 327 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 328 if (vec == CEED_VECTOR_ACTIVE) { 329 CeedElemRestriction rstr; 330 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 331 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 332 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 333 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 334 if (rstr_in && rstr_in != rstr) 335 // LCOV_EXCL_START 336 return CeedError(ceed, CEED_ERROR_BACKEND, 337 "Multi-field non-composite operator diagonal assembly not supported"); 338 // LCOV_EXCL_STOP 339 rstr_in = rstr; 340 CeedEvalMode eval_mode; 341 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 342 switch (eval_mode) { 343 case CEED_EVAL_NONE: 344 case CEED_EVAL_INTERP: 345 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 346 eval_mode_in[num_eval_mode_in] = eval_mode; 347 num_eval_mode_in += 1; 348 break; 349 case CEED_EVAL_GRAD: 350 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 351 for (CeedInt d=0; d<dim; d++) 352 eval_mode_in[num_eval_mode_in+d] = eval_mode; 353 num_eval_mode_in += dim; 354 break; 355 case CEED_EVAL_WEIGHT: 356 case CEED_EVAL_DIV: 357 case CEED_EVAL_CURL: 358 break; // Caught by QF Assembly 359 } 360 } 361 } 362 363 // Determine active output basis 364 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 365 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 366 CeedInt num_eval_mode_out = 0; 367 CeedEvalMode *eval_mode_out = NULL; 368 CeedBasis basis_out = NULL; 369 CeedElemRestriction rstr_out = NULL; 370 for (CeedInt i=0; i<num_output_fields; i++) { 371 CeedVector vec; 372 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 373 if (vec == CEED_VECTOR_ACTIVE) { 374 CeedElemRestriction rstr; 375 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 376 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 377 if (rstr_out && rstr_out != rstr) 378 // LCOV_EXCL_START 379 return CeedError(ceed, CEED_ERROR_BACKEND, 380 "Multi-field non-composite operator diagonal assembly not supported"); 381 // LCOV_EXCL_STOP 382 rstr_out = rstr; 383 CeedEvalMode eval_mode; 384 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 385 switch (eval_mode) { 386 case CEED_EVAL_NONE: 387 case CEED_EVAL_INTERP: 388 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 389 eval_mode_out[num_eval_mode_out] = eval_mode; 390 num_eval_mode_out += 1; 391 break; 392 case CEED_EVAL_GRAD: 393 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 394 for (CeedInt d=0; d<dim; d++) 395 eval_mode_out[num_eval_mode_out+d] = eval_mode; 396 num_eval_mode_out += dim; 397 break; 398 case CEED_EVAL_WEIGHT: 399 case CEED_EVAL_DIV: 400 case CEED_EVAL_CURL: 401 break; // Caught by QF Assembly 402 } 403 } 404 } 405 406 // Assemble point block diagonal restriction, if needed 407 CeedElemRestriction diag_rstr = rstr_out; 408 if (is_pointblock) { 409 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 410 CeedChk(ierr); 411 } 412 413 // Create diagonal vector 414 CeedVector elem_diag; 415 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 416 CeedChk(ierr); 417 418 // Assemble element operator diagonals 419 CeedScalar *elem_diag_array; 420 const CeedScalar *assembled_qf_array; 421 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 422 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 423 CeedChk(ierr); 424 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 425 CeedChk(ierr); 426 CeedInt num_elem, num_nodes, num_qpts; 427 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 428 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 429 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 430 // Basis matrices 431 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 432 CeedScalar *identity = NULL; 433 bool evalNone = false; 434 for (CeedInt i=0; i<num_eval_mode_in; i++) 435 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 436 for (CeedInt i=0; i<num_eval_mode_out; i++) 437 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 438 if (evalNone) { 439 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 440 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 441 identity[i*num_nodes+i] = 1.0; 442 } 443 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 444 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 445 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 446 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 447 // Compute the diagonal of B^T D B 448 // Each element 449 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 450 for (CeedInt e=0; e<num_elem; e++) { 451 CeedInt d_out = -1; 452 // Each basis eval mode pair 453 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 454 const CeedScalar *bt = NULL; 455 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 456 d_out += 1; 457 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 458 &grad_out[d_out*num_qpts*num_nodes], &bt); 459 CeedInt d_in = -1; 460 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 461 const CeedScalar *b = NULL; 462 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 463 d_in += 1; 464 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 465 &grad_in[d_in*num_qpts*num_nodes], &b); 466 // Each component 467 for (CeedInt c_out=0; c_out<num_comp; c_out++) 468 // Each qpoint/node pair 469 for (CeedInt q=0; q<num_qpts; q++) 470 if (is_pointblock) { 471 // Point Block Diagonal 472 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 473 const CeedScalar qf_value = 474 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 475 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 476 if (fabs(qf_value) > qf_value_bound) 477 for (CeedInt n=0; n<num_nodes; n++) 478 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 479 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 480 } 481 } else { 482 // Diagonal Only 483 const CeedScalar qf_value = 484 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 485 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 486 if (fabs(qf_value) > qf_value_bound) 487 for (CeedInt n=0; n<num_nodes; n++) 488 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 489 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 490 } 491 } 492 } 493 } 494 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 495 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 496 CeedChk(ierr); 497 498 // Assemble local operator diagonal 499 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 500 assembled, request); CeedChk(ierr); 501 502 // Cleanup 503 if (is_pointblock) { 504 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 505 } 506 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 507 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 508 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 509 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 510 ierr = CeedFree(&identity); CeedChk(ierr); 511 512 return CEED_ERROR_SUCCESS; 513 } 514 515 /** 516 @brief Core logic for assembling composite operator diagonal 517 518 @param[in] op CeedOperator to assemble point block diagonal 519 @param[in] request Address of CeedRequest for non-blocking completion, else 520 CEED_REQUEST_IMMEDIATE 521 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 522 @param[out] assembled CeedVector to store assembled diagonal 523 524 @return An error code: 0 - success, otherwise - failure 525 526 @ref Developer 527 **/ 528 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 529 CeedOperator op, CeedRequest *request, const bool is_pointblock, 530 CeedVector assembled) { 531 int ierr; 532 CeedInt num_sub; 533 CeedOperator *suboperators; 534 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 535 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 536 for (CeedInt i = 0; i < num_sub; i++) { 537 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 538 is_pointblock, assembled); CeedChk(ierr); 539 } 540 return CEED_ERROR_SUCCESS; 541 } 542 543 /** 544 @brief Build nonzero pattern for non-composite operator 545 546 Users should generally use CeedOperatorLinearAssembleSymbolic() 547 548 @param[in] op CeedOperator to assemble nonzero pattern 549 @param[in] offset Offset for number of entries 550 @param[out] rows Row number for each entry 551 @param[out] cols Column number for each entry 552 553 @return An error code: 0 - success, otherwise - failure 554 555 @ref Developer 556 **/ 557 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 558 CeedInt *rows, CeedInt *cols) { 559 int ierr; 560 Ceed ceed = op->ceed; 561 if (op->is_composite) 562 // LCOV_EXCL_START 563 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 564 "Composite operator not supported"); 565 // LCOV_EXCL_STOP 566 567 CeedSize num_nodes; 568 ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr); 569 CeedElemRestriction rstr_in; 570 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 571 CeedInt num_elem, elem_size, num_comp; 572 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 573 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 574 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 575 CeedInt layout_er[3]; 576 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 577 578 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 579 580 // Determine elem_dof relation 581 CeedVector index_vec; 582 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 583 CeedScalar *array; 584 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 585 for (CeedInt i = 0; i < num_nodes; ++i) { 586 array[i] = i; 587 } 588 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 589 CeedVector elem_dof; 590 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 591 CeedChk(ierr); 592 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 593 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 594 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 595 const CeedScalar *elem_dof_a; 596 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 597 CeedChk(ierr); 598 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 599 600 // Determine i, j locations for element matrices 601 CeedInt count = 0; 602 for (CeedInt e = 0; e < num_elem; ++e) { 603 for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) { 604 for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) { 605 for (CeedInt i = 0; i < elem_size; ++i) { 606 for (CeedInt j = 0; j < elem_size; ++j) { 607 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 608 (comp_out)*layout_er[1] + e*layout_er[2]; 609 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 610 (comp_in)*layout_er[1] + e*layout_er[2]; 611 612 const CeedInt row = elem_dof_a[elem_dof_index_row]; 613 const CeedInt col = elem_dof_a[elem_dof_index_col]; 614 615 rows[offset + count] = row; 616 cols[offset + count] = col; 617 count++; 618 } 619 } 620 } 621 } 622 } 623 if (count != local_num_entries) 624 // LCOV_EXCL_START 625 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 626 // LCOV_EXCL_STOP 627 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 628 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 629 630 return CEED_ERROR_SUCCESS; 631 } 632 633 /** 634 @brief Assemble nonzero entries for non-composite operator 635 636 Users should generally use CeedOperatorLinearAssemble() 637 638 @param[in] op CeedOperator to assemble 639 @param[out] offset Offest for number of entries 640 @param[out] values Values to assemble into matrix 641 642 @return An error code: 0 - success, otherwise - failure 643 644 @ref Developer 645 **/ 646 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 647 CeedVector values) { 648 int ierr; 649 Ceed ceed = op->ceed; 650 if (op->is_composite) 651 // LCOV_EXCL_START 652 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 653 "Composite operator not supported"); 654 // LCOV_EXCL_STOP 655 656 // Assemble QFunction 657 CeedQFunction qf; 658 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 659 CeedVector assembled_qf; 660 CeedElemRestriction rstr_q; 661 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 662 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 663 664 CeedSize qf_length; 665 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 666 667 CeedInt num_input_fields, num_output_fields; 668 CeedOperatorField *input_fields; 669 CeedOperatorField *output_fields; 670 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 671 &num_output_fields, &output_fields); CeedChk(ierr); 672 673 // Determine active input basis 674 CeedQFunctionField *qf_fields; 675 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 676 CeedInt num_eval_mode_in = 0, dim = 1; 677 CeedEvalMode *eval_mode_in = NULL; 678 CeedBasis basis_in = NULL; 679 CeedElemRestriction rstr_in = NULL; 680 for (CeedInt i=0; i<num_input_fields; i++) { 681 CeedVector vec; 682 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 683 if (vec == CEED_VECTOR_ACTIVE) { 684 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 685 CeedChk(ierr); 686 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 687 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 688 CeedChk(ierr); 689 CeedEvalMode eval_mode; 690 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 691 CeedChk(ierr); 692 switch (eval_mode) { 693 case CEED_EVAL_NONE: 694 case CEED_EVAL_INTERP: 695 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 696 eval_mode_in[num_eval_mode_in] = eval_mode; 697 num_eval_mode_in += 1; 698 break; 699 case CEED_EVAL_GRAD: 700 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 701 for (CeedInt d=0; d<dim; d++) { 702 eval_mode_in[num_eval_mode_in+d] = eval_mode; 703 } 704 num_eval_mode_in += dim; 705 break; 706 case CEED_EVAL_WEIGHT: 707 case CEED_EVAL_DIV: 708 case CEED_EVAL_CURL: 709 break; // Caught by QF Assembly 710 } 711 } 712 } 713 714 // Determine active output basis 715 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 716 CeedInt num_eval_mode_out = 0; 717 CeedEvalMode *eval_mode_out = NULL; 718 CeedBasis basis_out = NULL; 719 CeedElemRestriction rstr_out = NULL; 720 for (CeedInt i=0; i<num_output_fields; i++) { 721 CeedVector vec; 722 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 723 if (vec == CEED_VECTOR_ACTIVE) { 724 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 725 CeedChk(ierr); 726 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 727 CeedChk(ierr); 728 CeedEvalMode eval_mode; 729 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 730 CeedChk(ierr); 731 switch (eval_mode) { 732 case CEED_EVAL_NONE: 733 case CEED_EVAL_INTERP: 734 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 735 eval_mode_out[num_eval_mode_out] = eval_mode; 736 num_eval_mode_out += 1; 737 break; 738 case CEED_EVAL_GRAD: 739 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 740 for (CeedInt d=0; d<dim; d++) { 741 eval_mode_out[num_eval_mode_out+d] = eval_mode; 742 } 743 num_eval_mode_out += dim; 744 break; 745 case CEED_EVAL_WEIGHT: 746 case CEED_EVAL_DIV: 747 case CEED_EVAL_CURL: 748 break; // Caught by QF Assembly 749 } 750 } 751 } 752 753 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 754 // LCOV_EXCL_START 755 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 756 "Cannot assemble operator with out inputs/outputs"); 757 // LCOV_EXCL_STOP 758 759 CeedInt num_elem, elem_size, num_qpts, num_comp; 760 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 761 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 762 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 763 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 764 765 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 766 767 // loop over elements and put in data structure 768 const CeedScalar *interp_in, *grad_in; 769 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 770 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 771 772 const CeedScalar *assembled_qf_array; 773 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 774 CeedChk(ierr); 775 776 CeedInt layout_qf[3]; 777 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 778 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 779 780 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 781 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 782 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 783 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 784 num_qpts]; // logically 3-tensor 785 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 786 CeedScalar elem_mat[elem_size * elem_size]; 787 CeedInt count = 0; 788 CeedScalar *vals; 789 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 790 for (CeedInt e = 0; e < num_elem; ++e) { 791 for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) { 792 for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) { 793 for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 794 B_mat_in[ell] = 0.0; 795 } 796 for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 797 B_mat_out[ell] = 0.0; 798 } 799 // Store block-diagonal D matrix as collection of small dense blocks 800 for (CeedInt ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; 801 ++ell) { 802 D_mat[ell] = 0.0; 803 } 804 // form element matrix itself (for each block component) 805 for (CeedInt ell = 0; ell < elem_size*elem_size; ++ell) { 806 elem_mat[ell] = 0.0; 807 } 808 for (CeedInt q = 0; q < num_qpts; ++q) { 809 for (CeedInt n = 0; n < elem_size; ++n) { 810 CeedInt d_in = -1; 811 for (CeedInt e_in = 0; e_in < num_eval_mode_in; ++e_in) { 812 const CeedInt qq = num_eval_mode_in*q; 813 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 814 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 815 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 816 d_in += 1; 817 B_mat_in[(qq+e_in)*elem_size + n] += 818 grad_in[(d_in*num_qpts+q) * elem_size + n]; 819 } else { 820 // LCOV_EXCL_START 821 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 822 // LCOV_EXCL_STOP 823 } 824 } 825 CeedInt d_out = -1; 826 for (CeedInt e_out = 0; e_out < num_eval_mode_out; ++e_out) { 827 const CeedInt qq = num_eval_mode_out*q; 828 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 829 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 830 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 831 d_out += 1; 832 B_mat_out[(qq+e_out)*elem_size + n] += 833 grad_in[(d_out*num_qpts+q) * elem_size + n]; 834 } else { 835 // LCOV_EXCL_START 836 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 837 // LCOV_EXCL_STOP 838 } 839 } 840 } 841 for (CeedInt ei = 0; ei < num_eval_mode_out; ++ei) { 842 for (CeedInt ej = 0; ej < num_eval_mode_in; ++ej) { 843 const CeedInt eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej) 844 *num_comp 845 +comp_out; 846 const CeedInt index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 847 e*layout_qf[2]; 848 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 849 } 850 } 851 } 852 // Compute B^T*D 853 for (CeedInt ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 854 BTD[ell] = 0.0; 855 } 856 for (CeedInt j = 0; j<elem_size; ++j) { 857 for (CeedInt q = 0; q<num_qpts; ++q) { 858 const CeedInt qq = num_eval_mode_out*q; 859 for (CeedInt ei = 0; ei < num_eval_mode_in; ++ei) { 860 for (CeedInt ej = 0; ej < num_eval_mode_out; ++ej) { 861 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 862 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 863 } 864 } 865 } 866 } 867 868 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 869 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 870 871 // put element matrix in coordinate data structure 872 for (CeedInt i = 0; i < elem_size; ++i) { 873 for (CeedInt j = 0; j < elem_size; ++j) { 874 vals[offset + count] = elem_mat[i*elem_size + j]; 875 count++; 876 } 877 } 878 } 879 } 880 } 881 if (count != local_num_entries) 882 // LCOV_EXCL_START 883 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 884 // LCOV_EXCL_STOP 885 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 886 887 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 888 CeedChk(ierr); 889 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 890 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 891 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 892 893 return CEED_ERROR_SUCCESS; 894 } 895 896 /** 897 @brief Count number of entries for assembled CeedOperator 898 899 @param[in] op CeedOperator to assemble 900 @param[out] num_entries Number of entries in assembled representation 901 902 @return An error code: 0 - success, otherwise - failure 903 904 @ref Utility 905 **/ 906 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 907 CeedInt *num_entries) { 908 int ierr; 909 CeedElemRestriction rstr; 910 CeedInt num_elem, elem_size, num_comp; 911 912 if (op->is_composite) 913 // LCOV_EXCL_START 914 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 915 "Composite operator not supported"); 916 // LCOV_EXCL_STOP 917 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 918 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 919 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 920 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 921 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 922 923 return CEED_ERROR_SUCCESS; 924 } 925 926 /** 927 @brief Common code for creating a multigrid coarse operator and level 928 transfer operators for a CeedOperator 929 930 @param[in] op_fine Fine grid operator 931 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 932 @param[in] rstr_coarse Coarse grid restriction 933 @param[in] basis_coarse Coarse grid active vector basis 934 @param[in] basis_c_to_f Basis for coarse to fine interpolation 935 @param[out] op_coarse Coarse grid operator 936 @param[out] op_prolong Coarse to fine operator 937 @param[out] op_restrict Fine to coarse operator 938 939 @return An error code: 0 - success, otherwise - failure 940 941 @ref Developer 942 **/ 943 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 944 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 945 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 946 CeedOperator *op_restrict) { 947 int ierr; 948 Ceed ceed; 949 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 950 951 // Check for composite operator 952 bool is_composite; 953 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 954 if (is_composite) 955 // LCOV_EXCL_START 956 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 957 "Automatic multigrid setup for composite operators not supported"); 958 // LCOV_EXCL_STOP 959 960 // Coarse Grid 961 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 962 op_coarse); CeedChk(ierr); 963 CeedElemRestriction rstr_fine = NULL; 964 // -- Clone input fields 965 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 966 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 967 rstr_fine = op_fine->input_fields[i]->elem_restr; 968 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 969 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 970 CeedChk(ierr); 971 } else { 972 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 973 op_fine->input_fields[i]->elem_restr, 974 op_fine->input_fields[i]->basis, 975 op_fine->input_fields[i]->vec); CeedChk(ierr); 976 } 977 } 978 // -- Clone output fields 979 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 980 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 981 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 982 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 983 CeedChk(ierr); 984 } else { 985 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 986 op_fine->output_fields[i]->elem_restr, 987 op_fine->output_fields[i]->basis, 988 op_fine->output_fields[i]->vec); CeedChk(ierr); 989 } 990 } 991 // -- Clone QFunctionAssemblyData 992 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 993 &(*op_coarse)->qf_assembled); CeedChk(ierr); 994 995 // Multiplicity vector 996 CeedVector mult_vec, mult_e_vec; 997 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 998 CeedChk(ierr); 999 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 1000 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 1001 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 1002 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 1003 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 1004 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 1005 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 1006 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 1007 1008 // Restriction 1009 CeedInt num_comp; 1010 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 1011 CeedQFunction qf_restrict; 1012 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 1013 CeedChk(ierr); 1014 CeedInt *num_comp_r_data; 1015 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 1016 num_comp_r_data[0] = num_comp; 1017 CeedQFunctionContext ctx_r; 1018 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 1019 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 1020 sizeof(*num_comp_r_data), num_comp_r_data); 1021 CeedChk(ierr); 1022 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 1023 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 1024 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 1025 CeedChk(ierr); 1026 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 1027 CeedChk(ierr); 1028 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 1029 CEED_EVAL_INTERP); CeedChk(ierr); 1030 ierr = CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp); CeedChk(ierr); 1031 1032 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 1033 CEED_QFUNCTION_NONE, op_restrict); 1034 CeedChk(ierr); 1035 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 1036 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 1037 CeedChk(ierr); 1038 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 1039 CEED_BASIS_COLLOCATED, mult_vec); 1040 CeedChk(ierr); 1041 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 1042 CEED_VECTOR_ACTIVE); CeedChk(ierr); 1043 1044 // Prolongation 1045 CeedQFunction qf_prolong; 1046 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 1047 CeedChk(ierr); 1048 CeedInt *num_comp_p_data; 1049 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 1050 num_comp_p_data[0] = num_comp; 1051 CeedQFunctionContext ctx_p; 1052 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 1053 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 1054 sizeof(*num_comp_p_data), num_comp_p_data); 1055 CeedChk(ierr); 1056 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 1057 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 1058 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 1059 CeedChk(ierr); 1060 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 1061 CeedChk(ierr); 1062 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 1063 CeedChk(ierr); 1064 ierr = CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp); CeedChk(ierr); 1065 1066 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 1067 CEED_QFUNCTION_NONE, op_prolong); 1068 CeedChk(ierr); 1069 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 1070 CEED_VECTOR_ACTIVE); CeedChk(ierr); 1071 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 1072 CEED_BASIS_COLLOCATED, mult_vec); 1073 CeedChk(ierr); 1074 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 1075 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 1076 CeedChk(ierr); 1077 1078 // Clone name 1079 bool has_name = op_fine->name; 1080 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 1081 ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr); 1082 { 1083 char *prolongation_name; 1084 ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr); 1085 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", 1086 op_fine->name); 1087 ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr); 1088 ierr = CeedFree(&prolongation_name); CeedChk(ierr); 1089 } 1090 { 1091 char *restriction_name; 1092 ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr); 1093 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", 1094 op_fine->name); 1095 ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr); 1096 ierr = CeedFree(&restriction_name); CeedChk(ierr); 1097 } 1098 1099 // Cleanup 1100 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 1101 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 1102 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 1103 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 1104 1105 return CEED_ERROR_SUCCESS; 1106 } 1107 1108 /** 1109 @brief Build 1D mass matrix and Laplacian with perturbation 1110 1111 @param[in] interp_1d Interpolation matrix in one dimension 1112 @param[in] grad_1d Gradient matrix in one dimension 1113 @param[in] q_weight_1d Quadrature weights in one dimension 1114 @param[in] P_1d Number of basis nodes in one dimension 1115 @param[in] Q_1d Number of quadrature points in one dimension 1116 @param[in] dim Dimension of basis 1117 @param[out] mass Assembled mass matrix in one dimension 1118 @param[out] laplace Assembled perturbed Laplacian in one dimension 1119 1120 @return An error code: 0 - success, otherwise - failure 1121 1122 @ref Developer 1123 **/ 1124 CeedPragmaOptimizeOff 1125 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 1126 const CeedScalar *grad_1d, 1127 const CeedScalar *q_weight_1d, CeedInt P_1d, 1128 CeedInt Q_1d, CeedInt dim, 1129 CeedScalar *mass, CeedScalar *laplace) { 1130 1131 for (CeedInt i=0; i<P_1d; i++) 1132 for (CeedInt j=0; j<P_1d; j++) { 1133 CeedScalar sum = 0.0; 1134 for (CeedInt k=0; k<Q_1d; k++) 1135 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1136 mass[i+j*P_1d] = sum; 1137 } 1138 // -- Laplacian 1139 for (CeedInt i=0; i<P_1d; i++) 1140 for (CeedInt j=0; j<P_1d; j++) { 1141 CeedScalar sum = 0.0; 1142 for (CeedInt k=0; k<Q_1d; k++) 1143 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1144 laplace[i+j*P_1d] = sum; 1145 } 1146 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1147 for (CeedInt i=0; i<P_1d; i++) 1148 laplace[i+P_1d*i] += perturbation; 1149 return CEED_ERROR_SUCCESS; 1150 } 1151 CeedPragmaOptimizeOn 1152 1153 /// @} 1154 1155 /// ---------------------------------------------------------------------------- 1156 /// CeedOperator Backend API 1157 /// ---------------------------------------------------------------------------- 1158 /// @addtogroup CeedOperatorBackend 1159 /// @{ 1160 1161 /** 1162 @brief Create object holding CeedQFunction assembly data for CeedOperator 1163 1164 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1165 @param[out] data Address of the variable where the newly created 1166 CeedQFunctionAssemblyData will be stored 1167 1168 @return An error code: 0 - success, otherwise - failure 1169 1170 @ref Backend 1171 **/ 1172 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1173 CeedQFunctionAssemblyData *data) { 1174 int ierr; 1175 1176 ierr = CeedCalloc(1, data); CeedChk(ierr); 1177 (*data)->ref_count = 1; 1178 (*data)->ceed = ceed; 1179 ierr = CeedReference(ceed); CeedChk(ierr); 1180 1181 return CEED_ERROR_SUCCESS; 1182 } 1183 1184 /** 1185 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1186 1187 @param data CeedQFunctionAssemblyData to increment the reference counter 1188 1189 @return An error code: 0 - success, otherwise - failure 1190 1191 @ref Backend 1192 **/ 1193 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1194 data->ref_count++; 1195 return CEED_ERROR_SUCCESS; 1196 } 1197 1198 /** 1199 @brief Set re-use of CeedQFunctionAssemblyData 1200 1201 @param data CeedQFunctionAssemblyData to mark for reuse 1202 @param reuse_data Boolean flag indicating data re-use 1203 1204 @return An error code: 0 - success, otherwise - failure 1205 1206 @ref Backend 1207 **/ 1208 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, 1209 bool reuse_data) { 1210 data->reuse_data = reuse_data; 1211 data->needs_data_update = true; 1212 return CEED_ERROR_SUCCESS; 1213 } 1214 1215 /** 1216 @brief Mark QFunctionAssemblyData as stale 1217 1218 @param data CeedQFunctionAssemblyData to mark as stale 1219 @param needs_data_update Boolean flag indicating if update is needed or completed 1220 1221 @return An error code: 0 - success, otherwise - failure 1222 1223 @ref Backend 1224 **/ 1225 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, 1226 bool needs_data_update) { 1227 data->needs_data_update = needs_data_update; 1228 return CEED_ERROR_SUCCESS; 1229 } 1230 1231 /** 1232 @brief Determine if QFunctionAssemblyData needs update 1233 1234 @param[in] data CeedQFunctionAssemblyData to mark as stale 1235 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1236 1237 @return An error code: 0 - success, otherwise - failure 1238 1239 @ref Backend 1240 **/ 1241 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1242 bool *is_update_needed) { 1243 *is_update_needed = !data->reuse_data || data->needs_data_update; 1244 return CEED_ERROR_SUCCESS; 1245 } 1246 1247 /** 1248 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1249 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1250 Note: If `*data_copy` is non-NULL, then it is assumed that 1251 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1252 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1253 the only reference to this CeedQFunctionAssemblyData. 1254 1255 @param data CeedQFunctionAssemblyData to copy reference to 1256 @param[out] data_copy Variable to store copied reference 1257 1258 @return An error code: 0 - success, otherwise - failure 1259 1260 @ref Backend 1261 **/ 1262 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1263 CeedQFunctionAssemblyData *data_copy) { 1264 int ierr; 1265 1266 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1267 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1268 *data_copy = data; 1269 return CEED_ERROR_SUCCESS; 1270 } 1271 1272 /** 1273 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1274 1275 @param[in] data CeedQFunctionAssemblyData to retreive status 1276 @param[out] is_setup Boolean flag for setup status 1277 1278 @return An error code: 0 - success, otherwise - failure 1279 1280 @ref Backend 1281 **/ 1282 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1283 bool *is_setup) { 1284 *is_setup = data->is_setup; 1285 return CEED_ERROR_SUCCESS; 1286 } 1287 1288 /** 1289 @brief Set internal objects for CeedQFunctionAssemblyData 1290 1291 @param[in] data CeedQFunctionAssemblyData to set objects 1292 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1293 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1294 1295 @return An error code: 0 - success, otherwise - failure 1296 1297 @ref Backend 1298 **/ 1299 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1300 CeedVector vec, CeedElemRestriction rstr) { 1301 int ierr; 1302 1303 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1304 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1305 1306 data->is_setup = true; 1307 return CEED_ERROR_SUCCESS; 1308 } 1309 1310 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1311 CeedVector *vec, CeedElemRestriction *rstr) { 1312 int ierr; 1313 1314 if (!data->is_setup) 1315 // LCOV_EXCL_START 1316 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1317 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1318 // LCOV_EXCL_STOP 1319 1320 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1321 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1322 1323 return CEED_ERROR_SUCCESS; 1324 } 1325 1326 /** 1327 @brief Destroy CeedQFunctionAssemblyData 1328 1329 @param[out] data CeedQFunctionAssemblyData to destroy 1330 1331 @return An error code: 0 - success, otherwise - failure 1332 1333 @ref Backend 1334 **/ 1335 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1336 int ierr; 1337 1338 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1339 1340 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1341 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1342 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1343 1344 ierr = CeedFree(data); CeedChk(ierr); 1345 return CEED_ERROR_SUCCESS; 1346 } 1347 1348 /// @} 1349 1350 /// ---------------------------------------------------------------------------- 1351 /// CeedOperator Public API 1352 /// ---------------------------------------------------------------------------- 1353 /// @addtogroup CeedOperatorUser 1354 /// @{ 1355 1356 /** 1357 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1358 1359 This returns a CeedVector containing a matrix at each quadrature point 1360 providing the action of the CeedQFunction associated with the CeedOperator. 1361 The vector 'assembled' is of shape 1362 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1363 and contains column-major matrices representing the action of the 1364 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1365 outputs are in the order provided by the user when adding CeedOperator fields. 1366 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1367 'v', provided in that order, would result in an assembled QFunction that 1368 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1369 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1370 1371 Note: Calling this function asserts that setup is complete 1372 and sets the CeedOperator as immutable. 1373 1374 @param op CeedOperator to assemble CeedQFunction 1375 @param[out] assembled CeedVector to store assembled CeedQFunction at 1376 quadrature points 1377 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1378 CeedQFunction 1379 @param request Address of CeedRequest for non-blocking completion, else 1380 @ref CEED_REQUEST_IMMEDIATE 1381 1382 @return An error code: 0 - success, otherwise - failure 1383 1384 @ref User 1385 **/ 1386 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1387 CeedElemRestriction *rstr, 1388 CeedRequest *request) { 1389 int ierr; 1390 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1391 1392 if (op->LinearAssembleQFunction) { 1393 // Backend version 1394 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1395 CeedChk(ierr); 1396 } else { 1397 // Operator fallback 1398 CeedOperator op_fallback; 1399 1400 ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); 1401 if (op_fallback) { 1402 ierr = CeedOperatorLinearAssembleQFunction(op_fallback, assembled, 1403 rstr, request); CeedChk(ierr); 1404 } else { 1405 // LCOV_EXCL_START 1406 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 1407 "Backend does not support CeedOperatorLinearAssembleQFunction"); 1408 // LCOV_EXCL_STOP 1409 } 1410 } 1411 return CEED_ERROR_SUCCESS; 1412 } 1413 1414 /** 1415 @brief Assemble CeedQFunction and store result internall. Return copied 1416 references of stored data to the caller. Caller is responsible for 1417 ownership and destruction of the copied references. See also 1418 @ref CeedOperatorLinearAssembleQFunction 1419 1420 @param op CeedOperator to assemble CeedQFunction 1421 @param assembled CeedVector to store assembled CeedQFunction at 1422 quadrature points 1423 @param rstr CeedElemRestriction for CeedVector containing assembled 1424 CeedQFunction 1425 @param request Address of CeedRequest for non-blocking completion, else 1426 @ref CEED_REQUEST_IMMEDIATE 1427 1428 @return An error code: 0 - success, otherwise - failure 1429 1430 @ref User 1431 **/ 1432 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1433 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1434 int ierr; 1435 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1436 1437 if (op->LinearAssembleQFunctionUpdate) { 1438 // Backend version 1439 bool qf_assembled_is_setup; 1440 CeedVector assembled_vec = NULL; 1441 CeedElemRestriction assembled_rstr = NULL; 1442 1443 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1444 &qf_assembled_is_setup); CeedChk(ierr); 1445 if (qf_assembled_is_setup) { 1446 bool update_needed; 1447 1448 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1449 &assembled_rstr); CeedChk(ierr); 1450 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1451 &update_needed); CeedChk(ierr); 1452 if (update_needed) { 1453 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1454 request); CeedChk(ierr); 1455 } 1456 } else { 1457 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1458 request); CeedChk(ierr); 1459 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1460 assembled_rstr); CeedChk(ierr); 1461 } 1462 ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); 1463 CeedChk(ierr); 1464 1465 // Copy reference from internally held copy 1466 *assembled = NULL; 1467 *rstr = NULL; 1468 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1469 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1470 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1471 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1472 } else { 1473 // Operator fallback 1474 CeedOperator op_fallback; 1475 1476 ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); 1477 if (op_fallback) { 1478 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, 1479 rstr, request); CeedChk(ierr); 1480 } else { 1481 // LCOV_EXCL_START 1482 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 1483 "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); 1484 // LCOV_EXCL_STOP 1485 } 1486 } 1487 1488 return CEED_ERROR_SUCCESS; 1489 } 1490 1491 /** 1492 @brief Assemble the diagonal of a square linear CeedOperator 1493 1494 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1495 1496 Note: Currently only non-composite CeedOperators with a single field and 1497 composite CeedOperators with single field sub-operators are supported. 1498 1499 Note: Calling this function asserts that setup is complete 1500 and sets the CeedOperator as immutable. 1501 1502 @param op CeedOperator to assemble CeedQFunction 1503 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1504 @param request Address of CeedRequest for non-blocking completion, else 1505 @ref CEED_REQUEST_IMMEDIATE 1506 1507 @return An error code: 0 - success, otherwise - failure 1508 1509 @ref User 1510 **/ 1511 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1512 CeedRequest *request) { 1513 int ierr; 1514 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1515 1516 CeedSize input_size = 0, output_size = 0; 1517 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1518 CeedChk(ierr); 1519 if (input_size != output_size) 1520 // LCOV_EXCL_START 1521 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1522 // LCOV_EXCL_STOP 1523 1524 if (op->LinearAssembleDiagonal) { 1525 // Backend version 1526 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1527 return CEED_ERROR_SUCCESS; 1528 } else if (op->LinearAssembleAddDiagonal) { 1529 // Backend version with zeroing first 1530 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1531 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1532 return CEED_ERROR_SUCCESS; 1533 } else { 1534 // Operator fallback 1535 CeedOperator op_fallback; 1536 1537 ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); 1538 if (op_fallback) { 1539 ierr = CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request); 1540 CeedChk(ierr); 1541 return CEED_ERROR_SUCCESS; 1542 } 1543 } 1544 // Default interface implementation 1545 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1546 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1547 CeedChk(ierr); 1548 1549 return CEED_ERROR_SUCCESS; 1550 } 1551 1552 /** 1553 @brief Assemble the diagonal of a square linear CeedOperator 1554 1555 This sums into a CeedVector the diagonal of a linear CeedOperator. 1556 1557 Note: Currently only non-composite CeedOperators with a single field and 1558 composite CeedOperators with single field sub-operators are supported. 1559 1560 Note: Calling this function asserts that setup is complete 1561 and sets the CeedOperator as immutable. 1562 1563 @param op CeedOperator to assemble CeedQFunction 1564 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1565 @param request Address of CeedRequest for non-blocking completion, else 1566 @ref CEED_REQUEST_IMMEDIATE 1567 1568 @return An error code: 0 - success, otherwise - failure 1569 1570 @ref User 1571 **/ 1572 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1573 CeedRequest *request) { 1574 int ierr; 1575 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1576 1577 CeedSize input_size = 0, output_size = 0; 1578 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1579 CeedChk(ierr); 1580 if (input_size != output_size) 1581 // LCOV_EXCL_START 1582 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1583 // LCOV_EXCL_STOP 1584 1585 if (op->LinearAssembleAddDiagonal) { 1586 // Backend version 1587 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1588 return CEED_ERROR_SUCCESS; 1589 } else { 1590 // Operator fallback 1591 CeedOperator op_fallback; 1592 1593 ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); 1594 if (op_fallback) { 1595 ierr = CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request); 1596 CeedChk(ierr); 1597 return CEED_ERROR_SUCCESS; 1598 } 1599 } 1600 // Default interface implementation 1601 bool is_composite; 1602 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1603 if (is_composite) { 1604 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1605 false, assembled); CeedChk(ierr); 1606 } else { 1607 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1608 CeedChk(ierr); 1609 } 1610 1611 return CEED_ERROR_SUCCESS; 1612 } 1613 1614 /** 1615 @brief Assemble the point block diagonal of a square linear CeedOperator 1616 1617 This overwrites a CeedVector with the point block diagonal of a linear 1618 CeedOperator. 1619 1620 Note: Currently only non-composite CeedOperators with a single field and 1621 composite CeedOperators with single field sub-operators are supported. 1622 1623 Note: Calling this function asserts that setup is complete 1624 and sets the CeedOperator as immutable. 1625 1626 @param op CeedOperator to assemble CeedQFunction 1627 @param[out] assembled CeedVector to store assembled CeedOperator point block 1628 diagonal, provided in row-major form with an 1629 @a num_comp * @a num_comp block at each node. The dimensions 1630 of this vector are derived from the active vector 1631 for the CeedOperator. The array has shape 1632 [nodes, component out, component in]. 1633 @param request Address of CeedRequest for non-blocking completion, else 1634 CEED_REQUEST_IMMEDIATE 1635 1636 @return An error code: 0 - success, otherwise - failure 1637 1638 @ref User 1639 **/ 1640 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1641 CeedVector assembled, CeedRequest *request) { 1642 int ierr; 1643 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1644 1645 CeedSize input_size = 0, output_size = 0; 1646 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1647 CeedChk(ierr); 1648 if (input_size != output_size) 1649 // LCOV_EXCL_START 1650 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1651 // LCOV_EXCL_STOP 1652 1653 if (op->LinearAssemblePointBlockDiagonal) { 1654 // Backend version 1655 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1656 CeedChk(ierr); 1657 return CEED_ERROR_SUCCESS; 1658 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1659 // Backend version with zeroing first 1660 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1661 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1662 request); CeedChk(ierr); 1663 return CEED_ERROR_SUCCESS; 1664 } else { 1665 // Operator fallback 1666 CeedOperator op_fallback; 1667 1668 ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); 1669 if (op_fallback) { 1670 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, 1671 request); CeedChk(ierr); 1672 return CEED_ERROR_SUCCESS; 1673 } 1674 } 1675 // Default interface implementation 1676 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1677 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1678 CeedChk(ierr); 1679 1680 return CEED_ERROR_SUCCESS; 1681 } 1682 1683 /** 1684 @brief Assemble the point block diagonal of a square linear CeedOperator 1685 1686 This sums into a CeedVector with the point block diagonal of a linear 1687 CeedOperator. 1688 1689 Note: Currently only non-composite CeedOperators with a single field and 1690 composite CeedOperators with single field sub-operators are supported. 1691 1692 Note: Calling this function asserts that setup is complete 1693 and sets the CeedOperator as immutable. 1694 1695 @param op CeedOperator to assemble CeedQFunction 1696 @param[out] assembled CeedVector to store assembled CeedOperator point block 1697 diagonal, provided in row-major form with an 1698 @a num_comp * @a num_comp block at each node. The dimensions 1699 of this vector are derived from the active vector 1700 for the CeedOperator. The array has shape 1701 [nodes, component out, component in]. 1702 @param request Address of CeedRequest for non-blocking completion, else 1703 CEED_REQUEST_IMMEDIATE 1704 1705 @return An error code: 0 - success, otherwise - failure 1706 1707 @ref User 1708 **/ 1709 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1710 CeedVector assembled, CeedRequest *request) { 1711 int ierr; 1712 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1713 1714 CeedSize input_size = 0, output_size = 0; 1715 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1716 CeedChk(ierr); 1717 if (input_size != output_size) 1718 // LCOV_EXCL_START 1719 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1720 // LCOV_EXCL_STOP 1721 1722 if (op->LinearAssembleAddPointBlockDiagonal) { 1723 // Backend version 1724 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1725 CeedChk(ierr); 1726 return CEED_ERROR_SUCCESS; 1727 } else { 1728 // Operator fallback 1729 CeedOperator op_fallback; 1730 1731 ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); 1732 if (op_fallback) { 1733 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, 1734 request); CeedChk(ierr); 1735 return CEED_ERROR_SUCCESS; 1736 } 1737 } 1738 // Default interface implemenation 1739 bool is_composite; 1740 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1741 if (is_composite) { 1742 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1743 true, assembled); CeedChk(ierr); 1744 } else { 1745 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1746 CeedChk(ierr); 1747 } 1748 1749 return CEED_ERROR_SUCCESS; 1750 } 1751 1752 /** 1753 @brief Fully assemble the nonzero pattern of a linear operator. 1754 1755 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1756 1757 The assembly routines use coordinate format, with num_entries tuples of the 1758 form (i, j, value) which indicate that value should be added to the matrix 1759 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1760 This function returns the number of entries and their (i, j) locations, 1761 while CeedOperatorLinearAssemble() provides the values in the same 1762 ordering. 1763 1764 This will generally be slow unless your operator is low-order. 1765 1766 Note: Calling this function asserts that setup is complete 1767 and sets the CeedOperator as immutable. 1768 1769 @param[in] op CeedOperator to assemble 1770 @param[out] num_entries Number of entries in coordinate nonzero pattern 1771 @param[out] rows Row number for each entry 1772 @param[out] cols Column number for each entry 1773 1774 @ref User 1775 **/ 1776 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1777 CeedInt **rows, CeedInt **cols) { 1778 int ierr; 1779 CeedInt num_suboperators, single_entries; 1780 CeedOperator *sub_operators; 1781 bool is_composite; 1782 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1783 1784 if (op->LinearAssembleSymbolic) { 1785 // Backend version 1786 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1787 return CEED_ERROR_SUCCESS; 1788 } else { 1789 // Operator fallback 1790 CeedOperator op_fallback; 1791 1792 ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); 1793 if (op_fallback) { 1794 ierr = CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols); 1795 CeedChk(ierr); 1796 return CEED_ERROR_SUCCESS; 1797 } 1798 } 1799 1800 // Default interface implementation 1801 1802 // count entries and allocate rows, cols arrays 1803 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1804 *num_entries = 0; 1805 if (is_composite) { 1806 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1807 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1808 for (CeedInt k = 0; k < num_suboperators; ++k) { 1809 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1810 &single_entries); CeedChk(ierr); 1811 *num_entries += single_entries; 1812 } 1813 } else { 1814 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1815 &single_entries); CeedChk(ierr); 1816 *num_entries += single_entries; 1817 } 1818 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1819 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1820 1821 // assemble nonzero locations 1822 CeedInt offset = 0; 1823 if (is_composite) { 1824 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1825 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1826 for (CeedInt k = 0; k < num_suboperators; ++k) { 1827 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1828 *cols); CeedChk(ierr); 1829 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1830 &single_entries); 1831 CeedChk(ierr); 1832 offset += single_entries; 1833 } 1834 } else { 1835 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1836 CeedChk(ierr); 1837 } 1838 1839 return CEED_ERROR_SUCCESS; 1840 } 1841 1842 /** 1843 @brief Fully assemble the nonzero entries of a linear operator. 1844 1845 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1846 1847 The assembly routines use coordinate format, with num_entries tuples of the 1848 form (i, j, value) which indicate that value should be added to the matrix 1849 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1850 This function returns the values of the nonzero entries to be added, their 1851 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1852 1853 This will generally be slow unless your operator is low-order. 1854 1855 Note: Calling this function asserts that setup is complete 1856 and sets the CeedOperator as immutable. 1857 1858 @param[in] op CeedOperator to assemble 1859 @param[out] values Values to assemble into matrix 1860 1861 @ref User 1862 **/ 1863 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1864 int ierr; 1865 CeedInt num_suboperators, single_entries = 0; 1866 CeedOperator *sub_operators; 1867 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1868 1869 if (op->LinearAssemble) { 1870 // Backend version 1871 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1872 return CEED_ERROR_SUCCESS; 1873 } else { 1874 // Operator fallback 1875 CeedOperator op_fallback; 1876 1877 ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); 1878 if (op_fallback) { 1879 ierr = CeedOperatorLinearAssemble(op_fallback, values); CeedChk(ierr); 1880 return CEED_ERROR_SUCCESS; 1881 } 1882 } 1883 1884 // Default interface implementation 1885 bool is_composite; 1886 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1887 1888 CeedInt offset = 0; 1889 if (is_composite) { 1890 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1891 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1892 for (CeedInt k = 0; k < num_suboperators; ++k) { 1893 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1894 CeedChk(ierr); 1895 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1896 &single_entries); 1897 CeedChk(ierr); 1898 offset += single_entries; 1899 } 1900 } else { 1901 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1902 } 1903 1904 return CEED_ERROR_SUCCESS; 1905 } 1906 1907 /** 1908 @brief Create a multigrid coarse operator and level transfer operators 1909 for a CeedOperator, creating the prolongation basis from the 1910 fine and coarse grid interpolation 1911 1912 Note: Calling this function asserts that setup is complete 1913 and sets the CeedOperator as immutable. 1914 1915 @param[in] op_fine Fine grid operator 1916 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1917 @param[in] rstr_coarse Coarse grid restriction 1918 @param[in] basis_coarse Coarse grid active vector basis 1919 @param[out] op_coarse Coarse grid operator 1920 @param[out] op_prolong Coarse to fine operator 1921 @param[out] op_restrict Fine to coarse operator 1922 1923 @return An error code: 0 - success, otherwise - failure 1924 1925 @ref User 1926 **/ 1927 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1928 CeedVector p_mult_fine, 1929 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1930 CeedOperator *op_coarse, CeedOperator *op_prolong, 1931 CeedOperator *op_restrict) { 1932 int ierr; 1933 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1934 Ceed ceed; 1935 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1936 1937 // Check for compatible quadrature spaces 1938 CeedBasis basis_fine; 1939 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1940 CeedInt Q_f, Q_c; 1941 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1942 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1943 if (Q_f != Q_c) 1944 // LCOV_EXCL_START 1945 return CeedError(ceed, CEED_ERROR_DIMENSION, 1946 "Bases must have compatible quadrature spaces"); 1947 // LCOV_EXCL_STOP 1948 1949 // Coarse to fine basis 1950 CeedInt P_f, P_c, Q = Q_f; 1951 bool is_tensor_f, is_tensor_c; 1952 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1953 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1954 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1955 if (is_tensor_f && is_tensor_c) { 1956 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1957 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1958 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1959 } else if (!is_tensor_f && !is_tensor_c) { 1960 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1961 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1962 } else { 1963 // LCOV_EXCL_START 1964 return CeedError(ceed, CEED_ERROR_MINOR, 1965 "Bases must both be tensor or non-tensor"); 1966 // LCOV_EXCL_STOP 1967 } 1968 1969 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1970 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1971 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1972 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1973 const CeedScalar *interp_f_source = NULL, *interp_c_source = NULL; 1974 if (is_tensor_f) { 1975 ierr = CeedBasisGetInterp1D(basis_fine, &interp_f_source); CeedChk(ierr); 1976 ierr = CeedBasisGetInterp1D(basis_coarse, &interp_c_source); CeedChk(ierr); 1977 } else { 1978 ierr = CeedBasisGetInterp(basis_fine, &interp_f_source); CeedChk(ierr); 1979 ierr = CeedBasisGetInterp(basis_coarse, &interp_c_source); CeedChk(ierr); 1980 } 1981 memcpy(interp_f, interp_f_source, Q*P_f*sizeof interp_f_source[0]); 1982 memcpy(interp_c, interp_c_source, Q*P_c*sizeof interp_c_source[0]); 1983 1984 // -- QR Factorization, interp_f = Q R 1985 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1986 1987 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1988 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1989 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1990 1991 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1992 for (CeedInt j=0; j<P_c; j++) { // Column j 1993 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1994 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1995 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1996 for (CeedInt k=i+1; k<P_f; k++) 1997 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1998 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1999 } 2000 } 2001 ierr = CeedFree(&tau); CeedChk(ierr); 2002 ierr = CeedFree(&interp_c); CeedChk(ierr); 2003 ierr = CeedFree(&interp_f); CeedChk(ierr); 2004 2005 // Complete with interp_c_to_f versions of code 2006 if (is_tensor_f) { 2007 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 2008 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 2009 CeedChk(ierr); 2010 } else { 2011 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 2012 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 2013 CeedChk(ierr); 2014 } 2015 2016 // Cleanup 2017 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 2018 return CEED_ERROR_SUCCESS; 2019 } 2020 2021 /** 2022 @brief Create a multigrid coarse operator and level transfer operators 2023 for a CeedOperator with a tensor basis for the active basis 2024 2025 Note: Calling this function asserts that setup is complete 2026 and sets the CeedOperator as immutable. 2027 2028 @param[in] op_fine Fine grid operator 2029 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2030 @param[in] rstr_coarse Coarse grid restriction 2031 @param[in] basis_coarse Coarse grid active vector basis 2032 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 2033 @param[out] op_coarse Coarse grid operator 2034 @param[out] op_prolong Coarse to fine operator 2035 @param[out] op_restrict Fine to coarse operator 2036 2037 @return An error code: 0 - success, otherwise - failure 2038 2039 @ref User 2040 **/ 2041 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 2042 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 2043 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 2044 CeedOperator *op_prolong, CeedOperator *op_restrict) { 2045 int ierr; 2046 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2047 Ceed ceed; 2048 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2049 2050 // Check for compatible quadrature spaces 2051 CeedBasis basis_fine; 2052 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2053 CeedInt Q_f, Q_c; 2054 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2055 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2056 if (Q_f != Q_c) 2057 // LCOV_EXCL_START 2058 return CeedError(ceed, CEED_ERROR_DIMENSION, 2059 "Bases must have compatible quadrature spaces"); 2060 // LCOV_EXCL_STOP 2061 2062 // Coarse to fine basis 2063 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 2064 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2065 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2066 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 2067 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2068 CeedChk(ierr); 2069 P_1d_c = dim == 1 ? num_nodes_c : 2070 dim == 2 ? sqrt(num_nodes_c) : 2071 cbrt(num_nodes_c); 2072 CeedScalar *q_ref, *q_weight, *grad; 2073 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 2074 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 2075 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 2076 CeedBasis basis_c_to_f; 2077 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 2078 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2079 CeedChk(ierr); 2080 ierr = CeedFree(&q_ref); CeedChk(ierr); 2081 ierr = CeedFree(&q_weight); CeedChk(ierr); 2082 ierr = CeedFree(&grad); CeedChk(ierr); 2083 2084 // Core code 2085 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2086 basis_coarse, basis_c_to_f, op_coarse, 2087 op_prolong, op_restrict); 2088 CeedChk(ierr); 2089 return CEED_ERROR_SUCCESS; 2090 } 2091 2092 /** 2093 @brief Create a multigrid coarse operator and level transfer operators 2094 for a CeedOperator with a non-tensor basis for the active vector 2095 2096 Note: Calling this function asserts that setup is complete 2097 and sets the CeedOperator as immutable. 2098 2099 @param[in] op_fine Fine grid operator 2100 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2101 @param[in] rstr_coarse Coarse grid restriction 2102 @param[in] basis_coarse Coarse grid active vector basis 2103 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 2104 @param[out] op_coarse Coarse grid operator 2105 @param[out] op_prolong Coarse to fine operator 2106 @param[out] op_restrict Fine to coarse operator 2107 2108 @return An error code: 0 - success, otherwise - failure 2109 2110 @ref User 2111 **/ 2112 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 2113 CeedVector p_mult_fine, 2114 CeedElemRestriction rstr_coarse, 2115 CeedBasis basis_coarse, 2116 const CeedScalar *interp_c_to_f, 2117 CeedOperator *op_coarse, 2118 CeedOperator *op_prolong, 2119 CeedOperator *op_restrict) { 2120 int ierr; 2121 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2122 Ceed ceed; 2123 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2124 2125 // Check for compatible quadrature spaces 2126 CeedBasis basis_fine; 2127 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2128 CeedInt Q_f, Q_c; 2129 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2130 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2131 if (Q_f != Q_c) 2132 // LCOV_EXCL_START 2133 return CeedError(ceed, CEED_ERROR_DIMENSION, 2134 "Bases must have compatible quadrature spaces"); 2135 // LCOV_EXCL_STOP 2136 2137 // Coarse to fine basis 2138 CeedElemTopology topo; 2139 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 2140 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2141 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2142 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2143 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 2144 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2145 CeedChk(ierr); 2146 CeedScalar *q_ref, *q_weight, *grad; 2147 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 2148 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 2149 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 2150 CeedBasis basis_c_to_f; 2151 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 2152 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2153 CeedChk(ierr); 2154 ierr = CeedFree(&q_ref); CeedChk(ierr); 2155 ierr = CeedFree(&q_weight); CeedChk(ierr); 2156 ierr = CeedFree(&grad); CeedChk(ierr); 2157 2158 // Core code 2159 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2160 basis_coarse, basis_c_to_f, op_coarse, 2161 op_prolong, op_restrict); 2162 CeedChk(ierr); 2163 return CEED_ERROR_SUCCESS; 2164 } 2165 2166 /** 2167 @brief Build a FDM based approximate inverse for each element for a 2168 CeedOperator 2169 2170 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2171 Method based approximate inverse. This function obtains the simultaneous 2172 diagonalization for the 1D mass and Laplacian operators, 2173 M = V^T V, K = V^T S V. 2174 The assembled QFunction is used to modify the eigenvalues from simultaneous 2175 diagonalization and obtain an approximate inverse of the form 2176 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2177 associated CeedQFunction must therefore also be linear. 2178 2179 Note: Calling this function asserts that setup is complete 2180 and sets the CeedOperator as immutable. 2181 2182 @param op CeedOperator to create element inverses 2183 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2184 for each element 2185 @param request Address of CeedRequest for non-blocking completion, else 2186 @ref CEED_REQUEST_IMMEDIATE 2187 2188 @return An error code: 0 - success, otherwise - failure 2189 2190 @ref User 2191 **/ 2192 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2193 CeedRequest *request) { 2194 int ierr; 2195 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2196 2197 if (op->CreateFDMElementInverse) { 2198 // Backend version 2199 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2200 return CEED_ERROR_SUCCESS; 2201 } else { 2202 // Operator fallback 2203 CeedOperator op_fallback; 2204 2205 ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); 2206 if (op_fallback) { 2207 ierr = CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request); 2208 CeedChk(ierr); 2209 return CEED_ERROR_SUCCESS; 2210 } 2211 } 2212 2213 // Default interface implementation 2214 Ceed ceed, ceed_parent; 2215 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2216 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2217 ceed_parent = ceed_parent ? ceed_parent : ceed; 2218 CeedQFunction qf; 2219 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2220 2221 // Determine active input basis 2222 bool interp = false, grad = false; 2223 CeedBasis basis = NULL; 2224 CeedElemRestriction rstr = NULL; 2225 CeedOperatorField *op_fields; 2226 CeedQFunctionField *qf_fields; 2227 CeedInt num_input_fields; 2228 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2229 CeedChk(ierr); 2230 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2231 for (CeedInt i=0; i<num_input_fields; i++) { 2232 CeedVector vec; 2233 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2234 if (vec == CEED_VECTOR_ACTIVE) { 2235 CeedEvalMode eval_mode; 2236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2237 interp = interp || eval_mode == CEED_EVAL_INTERP; 2238 grad = grad || eval_mode == CEED_EVAL_GRAD; 2239 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2240 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2241 } 2242 } 2243 if (!basis) 2244 // LCOV_EXCL_START 2245 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2246 // LCOV_EXCL_STOP 2247 CeedSize l_size = 1; 2248 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2249 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2250 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2251 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2252 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2253 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2254 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2255 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2256 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2257 2258 // Build and diagonalize 1D Mass and Laplacian 2259 bool tensor_basis; 2260 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2261 if (!tensor_basis) 2262 // LCOV_EXCL_START 2263 return CeedError(ceed, CEED_ERROR_BACKEND, 2264 "FDMElementInverse only supported for tensor " 2265 "bases"); 2266 // LCOV_EXCL_STOP 2267 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2268 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2269 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2270 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2271 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2272 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2273 // -- Build matrices 2274 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2275 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2276 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2277 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2278 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2279 mass, laplace); CeedChk(ierr); 2280 2281 // -- Diagonalize 2282 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2283 CeedChk(ierr); 2284 ierr = CeedFree(&mass); CeedChk(ierr); 2285 ierr = CeedFree(&laplace); CeedChk(ierr); 2286 for (CeedInt i=0; i<P_1d; i++) 2287 for (CeedInt j=0; j<P_1d; j++) 2288 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2289 ierr = CeedFree(&x); CeedChk(ierr); 2290 2291 // Assemble QFunction 2292 CeedVector assembled; 2293 CeedElemRestriction rstr_qf; 2294 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2295 &rstr_qf, request); CeedChk(ierr); 2296 CeedInt layout[3]; 2297 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2298 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2299 CeedScalar max_norm = 0; 2300 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2301 2302 // Calculate element averages 2303 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2304 CeedScalar *elem_avg; 2305 const CeedScalar *assembled_array, *q_weight_array; 2306 CeedVector q_weight; 2307 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2308 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2309 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2310 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2311 CeedChk(ierr); 2312 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2313 CeedChk(ierr); 2314 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2315 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2316 for (CeedInt e=0; e<num_elem; e++) { 2317 CeedInt count = 0; 2318 for (CeedInt q=0; q<num_qpts; q++) 2319 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2320 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2321 qf_value_bound) { 2322 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2323 q_weight_array[q]; 2324 count++; 2325 } 2326 if (count) { 2327 elem_avg[e] /= count; 2328 } else { 2329 elem_avg[e] = 1.0; 2330 } 2331 } 2332 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2333 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2334 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2335 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2336 2337 // Build FDM diagonal 2338 CeedVector q_data; 2339 CeedScalar *q_data_array, *fdm_diagonal; 2340 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2341 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2342 for (CeedInt c=0; c<num_comp; c++) 2343 for (CeedInt n=0; n<elem_size; n++) { 2344 if (interp) 2345 fdm_diagonal[c*elem_size + n] = 1.0; 2346 if (grad) 2347 for (CeedInt d=0; d<dim; d++) { 2348 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2349 fdm_diagonal[c*elem_size + n] += lambda[i]; 2350 } 2351 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2352 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2353 } 2354 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2355 CeedChk(ierr); 2356 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2357 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2358 CeedChk(ierr); 2359 for (CeedInt e=0; e<num_elem; e++) 2360 for (CeedInt c=0; c<num_comp; c++) 2361 for (CeedInt n=0; n<elem_size; n++) 2362 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2363 fdm_diagonal[c*elem_size + n]); 2364 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2365 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2366 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2367 2368 // Setup FDM operator 2369 // -- Basis 2370 CeedBasis fdm_basis; 2371 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2372 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2373 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2374 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2375 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2376 fdm_interp, grad_dummy, q_ref_dummy, 2377 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2378 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2379 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2380 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2381 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2382 ierr = CeedFree(&lambda); CeedChk(ierr); 2383 2384 // -- Restriction 2385 CeedElemRestriction rstr_qd_i; 2386 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2387 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2388 num_comp, num_elem*num_comp*elem_size, 2389 strides, &rstr_qd_i); CeedChk(ierr); 2390 // -- QFunction 2391 CeedQFunction qf_fdm; 2392 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2393 CeedChk(ierr); 2394 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2395 CeedChk(ierr); 2396 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2397 CeedChk(ierr); 2398 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2399 CeedChk(ierr); 2400 ierr = CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp); CeedChk(ierr); 2401 // -- QFunction context 2402 CeedInt *num_comp_data; 2403 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2404 num_comp_data[0] = num_comp; 2405 CeedQFunctionContext ctx_fdm; 2406 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2407 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2408 sizeof(*num_comp_data), num_comp_data); 2409 CeedChk(ierr); 2410 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2411 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2412 // -- Operator 2413 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2414 CeedChk(ierr); 2415 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2416 CeedChk(ierr); 2417 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2418 q_data); CeedChk(ierr); 2419 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2420 CeedChk(ierr); 2421 2422 // Cleanup 2423 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2424 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2425 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2426 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2427 2428 return CEED_ERROR_SUCCESS; 2429 } 2430 2431 /// @} 2432