1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed-impl.h> 11 #include <math.h> 12 #include <stdbool.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 /// @file 17 /// Implementation of CeedOperator preconditioning interfaces 18 19 /// ---------------------------------------------------------------------------- 20 /// CeedOperator Library Internal Preconditioning Functions 21 /// ---------------------------------------------------------------------------- 22 /// @addtogroup CeedOperatorDeveloper 23 /// @{ 24 25 /** 26 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 27 CeedOperator functionality 28 29 @param op CeedOperator to create fallback for 30 31 @return An error code: 0 - success, otherwise - failure 32 33 @ref Developer 34 **/ 35 int CeedOperatorCreateFallback(CeedOperator op) { 36 int ierr; 37 38 // Check not already created 39 if (op->op_fallback) return CEED_ERROR_SUCCESS; 40 41 // Fallback Ceed 42 const char *resource, *fallback_resource; 43 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 44 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 45 CeedChk(ierr); 46 if (!strcmp(resource, fallback_resource)) 47 // LCOV_EXCL_START 48 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 49 "Backend %s cannot create an operator" 50 "fallback to resource %s", resource, fallback_resource); 51 // LCOV_EXCL_STOP 52 53 // Fallback Ceed 54 if (!op->ceed->op_fallback_ceed) { 55 Ceed ceed_ref; 56 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 57 ceed_ref->op_fallback_parent = op->ceed; 58 ceed_ref->Error = op->ceed->Error; 59 op->ceed->op_fallback_ceed = ceed_ref; 60 } 61 62 // Clone Op 63 CeedOperator op_fallback; 64 if (op->is_composite) { 65 ierr = CeedCompositeOperatorCreate(op->ceed->op_fallback_ceed, &op_fallback); 66 CeedChk(ierr); 67 for (CeedInt i = 0; i < op->num_suboperators; i++) { 68 ierr = CeedCompositeOperatorAddSub(op_fallback, op->sub_operators[i]); 69 CeedChk(ierr); 70 } 71 } else { 72 ierr = CeedOperatorCreate(op->ceed->op_fallback_ceed, op->qf, op->dqf, op->dqfT, 73 &op_fallback); CeedChk(ierr); 74 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 75 ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, 76 op->input_fields[i]->elem_restr, 77 op->input_fields[i]->basis, 78 op->input_fields[i]->vec); CeedChk(ierr); 79 } 80 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 81 ierr = CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, 82 op->output_fields[i]->elem_restr, 83 op->output_fields[i]->basis, 84 op->output_fields[i]->vec); CeedChk(ierr); 85 } 86 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 87 &op_fallback->qf_assembled); CeedChk(ierr); 88 if (op_fallback->num_qpts == 0) { 89 ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts); 90 CeedChk(ierr); 91 } 92 } 93 ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr); 94 ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr); 95 op->op_fallback = op_fallback; 96 97 return CEED_ERROR_SUCCESS; 98 } 99 100 /** 101 @brief Select correct basis matrix pointer based on CeedEvalMode 102 103 @param[in] eval_mode Current basis evaluation mode 104 @param[in] identity Pointer to identity matrix 105 @param[in] interp Pointer to interpolation matrix 106 @param[in] grad Pointer to gradient matrix 107 @param[out] basis_ptr Basis pointer to set 108 109 @ref Developer 110 **/ 111 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 112 const CeedScalar *identity, const CeedScalar *interp, 113 const CeedScalar *grad, const CeedScalar **basis_ptr) { 114 switch (eval_mode) { 115 case CEED_EVAL_NONE: 116 *basis_ptr = identity; 117 break; 118 case CEED_EVAL_INTERP: 119 *basis_ptr = interp; 120 break; 121 case CEED_EVAL_GRAD: 122 *basis_ptr = grad; 123 break; 124 case CEED_EVAL_WEIGHT: 125 case CEED_EVAL_DIV: 126 case CEED_EVAL_CURL: 127 break; // Caught by QF Assembly 128 } 129 } 130 131 /** 132 @brief Create point block restriction for active operator field 133 134 @param[in] rstr Original CeedElemRestriction for active field 135 @param[out] pointblock_rstr Address of the variable where the newly created 136 CeedElemRestriction will be stored 137 138 @return An error code: 0 - success, otherwise - failure 139 140 @ref Developer 141 **/ 142 static int CeedOperatorCreateActivePointBlockRestriction( 143 CeedElemRestriction rstr, 144 CeedElemRestriction *pointblock_rstr) { 145 int ierr; 146 Ceed ceed; 147 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 148 const CeedInt *offsets; 149 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 150 CeedChk(ierr); 151 152 // Expand offsets 153 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 154 *pointblock_offsets; 155 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 156 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 157 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 158 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 159 CeedInt shift = num_comp; 160 if (comp_stride != 1) 161 shift *= num_comp; 162 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 163 CeedChk(ierr); 164 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 165 pointblock_offsets[i] = offsets[i]*shift; 166 if (pointblock_offsets[i] > max) 167 max = pointblock_offsets[i]; 168 } 169 170 // Create new restriction 171 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 172 1, max + num_comp*num_comp, CEED_MEM_HOST, 173 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 174 CeedChk(ierr); 175 176 // Cleanup 177 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 178 179 return CEED_ERROR_SUCCESS; 180 } 181 182 /** 183 @brief Core logic for assembling operator diagonal or point block diagonal 184 185 @param[in] op CeedOperator to assemble point block diagonal 186 @param[in] request Address of CeedRequest for non-blocking completion, else 187 CEED_REQUEST_IMMEDIATE 188 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 189 @param[out] assembled CeedVector to store assembled diagonal 190 191 @return An error code: 0 - success, otherwise - failure 192 193 @ref Developer 194 **/ 195 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 196 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 197 int ierr; 198 Ceed ceed; 199 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 200 201 // Assemble QFunction 202 CeedQFunction qf; 203 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 204 CeedInt num_input_fields, num_output_fields; 205 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 206 CeedChk(ierr); 207 CeedVector assembled_qf; 208 CeedElemRestriction rstr; 209 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 210 &rstr, request); CeedChk(ierr); 211 CeedInt layout[3]; 212 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 213 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 214 CeedScalar max_norm = 0; 215 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 216 217 // Determine active input basis 218 CeedOperatorField *op_fields; 219 CeedQFunctionField *qf_fields; 220 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 221 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 222 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 223 CeedEvalMode *eval_mode_in = NULL; 224 CeedBasis basis_in = NULL; 225 CeedElemRestriction rstr_in = NULL; 226 for (CeedInt i=0; i<num_input_fields; i++) { 227 CeedVector vec; 228 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 229 if (vec == CEED_VECTOR_ACTIVE) { 230 CeedElemRestriction rstr; 231 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 232 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 233 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 234 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 235 if (rstr_in && rstr_in != rstr) 236 // LCOV_EXCL_START 237 return CeedError(ceed, CEED_ERROR_BACKEND, 238 "Multi-field non-composite operator diagonal assembly not supported"); 239 // LCOV_EXCL_STOP 240 rstr_in = rstr; 241 CeedEvalMode eval_mode; 242 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 243 switch (eval_mode) { 244 case CEED_EVAL_NONE: 245 case CEED_EVAL_INTERP: 246 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 247 eval_mode_in[num_eval_mode_in] = eval_mode; 248 num_eval_mode_in += 1; 249 break; 250 case CEED_EVAL_GRAD: 251 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 252 for (CeedInt d=0; d<dim; d++) 253 eval_mode_in[num_eval_mode_in+d] = eval_mode; 254 num_eval_mode_in += dim; 255 break; 256 case CEED_EVAL_WEIGHT: 257 case CEED_EVAL_DIV: 258 case CEED_EVAL_CURL: 259 break; // Caught by QF Assembly 260 } 261 } 262 } 263 264 // Determine active output basis 265 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 266 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 267 CeedInt num_eval_mode_out = 0; 268 CeedEvalMode *eval_mode_out = NULL; 269 CeedBasis basis_out = NULL; 270 CeedElemRestriction rstr_out = NULL; 271 for (CeedInt i=0; i<num_output_fields; i++) { 272 CeedVector vec; 273 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 274 if (vec == CEED_VECTOR_ACTIVE) { 275 CeedElemRestriction rstr; 276 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 277 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 278 if (rstr_out && rstr_out != rstr) 279 // LCOV_EXCL_START 280 return CeedError(ceed, CEED_ERROR_BACKEND, 281 "Multi-field non-composite operator diagonal assembly not supported"); 282 // LCOV_EXCL_STOP 283 rstr_out = rstr; 284 CeedEvalMode eval_mode; 285 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 286 switch (eval_mode) { 287 case CEED_EVAL_NONE: 288 case CEED_EVAL_INTERP: 289 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 290 eval_mode_out[num_eval_mode_out] = eval_mode; 291 num_eval_mode_out += 1; 292 break; 293 case CEED_EVAL_GRAD: 294 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 295 for (CeedInt d=0; d<dim; d++) 296 eval_mode_out[num_eval_mode_out+d] = eval_mode; 297 num_eval_mode_out += dim; 298 break; 299 case CEED_EVAL_WEIGHT: 300 case CEED_EVAL_DIV: 301 case CEED_EVAL_CURL: 302 break; // Caught by QF Assembly 303 } 304 } 305 } 306 307 // Assemble point block diagonal restriction, if needed 308 CeedElemRestriction diag_rstr = rstr_out; 309 if (is_pointblock) { 310 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 311 CeedChk(ierr); 312 } 313 314 // Create diagonal vector 315 CeedVector elem_diag; 316 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 317 CeedChk(ierr); 318 319 // Assemble element operator diagonals 320 CeedScalar *elem_diag_array; 321 const CeedScalar *assembled_qf_array; 322 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 323 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 324 CeedChk(ierr); 325 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 326 CeedChk(ierr); 327 CeedInt num_elem, num_nodes, num_qpts; 328 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 329 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 330 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 331 // Basis matrices 332 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 333 CeedScalar *identity = NULL; 334 bool evalNone = false; 335 for (CeedInt i=0; i<num_eval_mode_in; i++) 336 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 337 for (CeedInt i=0; i<num_eval_mode_out; i++) 338 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 339 if (evalNone) { 340 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 341 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 342 identity[i*num_nodes+i] = 1.0; 343 } 344 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 345 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 346 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 347 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 348 // Compute the diagonal of B^T D B 349 // Each element 350 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 351 for (CeedInt e=0; e<num_elem; e++) { 352 CeedInt d_out = -1; 353 // Each basis eval mode pair 354 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 355 const CeedScalar *bt = NULL; 356 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 357 d_out += 1; 358 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 359 &grad_out[d_out*num_qpts*num_nodes], &bt); 360 CeedInt d_in = -1; 361 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 362 const CeedScalar *b = NULL; 363 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 364 d_in += 1; 365 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 366 &grad_in[d_in*num_qpts*num_nodes], &b); 367 // Each component 368 for (CeedInt c_out=0; c_out<num_comp; c_out++) 369 // Each qpoint/node pair 370 for (CeedInt q=0; q<num_qpts; q++) 371 if (is_pointblock) { 372 // Point Block Diagonal 373 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 374 const CeedScalar qf_value = 375 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 376 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 377 if (fabs(qf_value) > qf_value_bound) 378 for (CeedInt n=0; n<num_nodes; n++) 379 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 380 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 381 } 382 } else { 383 // Diagonal Only 384 const CeedScalar qf_value = 385 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 386 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 387 if (fabs(qf_value) > qf_value_bound) 388 for (CeedInt n=0; n<num_nodes; n++) 389 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 390 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 391 } 392 } 393 } 394 } 395 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 396 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 397 CeedChk(ierr); 398 399 // Assemble local operator diagonal 400 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 401 assembled, request); CeedChk(ierr); 402 403 // Cleanup 404 if (is_pointblock) { 405 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 406 } 407 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 408 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 409 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 410 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 411 ierr = CeedFree(&identity); CeedChk(ierr); 412 413 return CEED_ERROR_SUCCESS; 414 } 415 416 /** 417 @brief Core logic for assembling composite operator diagonal 418 419 @param[in] op CeedOperator to assemble point block diagonal 420 @param[in] request Address of CeedRequest for non-blocking completion, else 421 CEED_REQUEST_IMMEDIATE 422 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 423 @param[out] assembled CeedVector to store assembled diagonal 424 425 @return An error code: 0 - success, otherwise - failure 426 427 @ref Developer 428 **/ 429 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 430 CeedOperator op, CeedRequest *request, const bool is_pointblock, 431 CeedVector assembled) { 432 int ierr; 433 CeedInt num_sub; 434 CeedOperator *suboperators; 435 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 436 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 437 for (CeedInt i = 0; i < num_sub; i++) { 438 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 439 is_pointblock, assembled); CeedChk(ierr); 440 } 441 return CEED_ERROR_SUCCESS; 442 } 443 444 /** 445 @brief Build nonzero pattern for non-composite operator 446 447 Users should generally use CeedOperatorLinearAssembleSymbolic() 448 449 @param[in] op CeedOperator to assemble nonzero pattern 450 @param[in] offset Offset for number of entries 451 @param[out] rows Row number for each entry 452 @param[out] cols Column number for each entry 453 454 @return An error code: 0 - success, otherwise - failure 455 456 @ref Developer 457 **/ 458 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 459 CeedInt *rows, CeedInt *cols) { 460 int ierr; 461 Ceed ceed = op->ceed; 462 if (op->is_composite) 463 // LCOV_EXCL_START 464 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 465 "Composite operator not supported"); 466 // LCOV_EXCL_STOP 467 468 CeedSize num_nodes; 469 ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr); 470 CeedElemRestriction rstr_in; 471 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 472 CeedInt num_elem, elem_size, num_comp; 473 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 474 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 475 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 476 CeedInt layout_er[3]; 477 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 478 479 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 480 481 // Determine elem_dof relation 482 CeedVector index_vec; 483 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 484 CeedScalar *array; 485 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 486 for (CeedInt i = 0; i < num_nodes; ++i) { 487 array[i] = i; 488 } 489 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 490 CeedVector elem_dof; 491 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 492 CeedChk(ierr); 493 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 494 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 495 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 496 const CeedScalar *elem_dof_a; 497 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 498 CeedChk(ierr); 499 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 500 501 // Determine i, j locations for element matrices 502 CeedInt count = 0; 503 for (CeedInt e = 0; e < num_elem; ++e) { 504 for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) { 505 for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) { 506 for (CeedInt i = 0; i < elem_size; ++i) { 507 for (CeedInt j = 0; j < elem_size; ++j) { 508 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 509 (comp_out)*layout_er[1] + e*layout_er[2]; 510 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 511 (comp_in)*layout_er[1] + e*layout_er[2]; 512 513 const CeedInt row = elem_dof_a[elem_dof_index_row]; 514 const CeedInt col = elem_dof_a[elem_dof_index_col]; 515 516 rows[offset + count] = row; 517 cols[offset + count] = col; 518 count++; 519 } 520 } 521 } 522 } 523 } 524 if (count != local_num_entries) 525 // LCOV_EXCL_START 526 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 527 // LCOV_EXCL_STOP 528 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 529 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 530 531 return CEED_ERROR_SUCCESS; 532 } 533 534 /** 535 @brief Assemble nonzero entries for non-composite operator 536 537 Users should generally use CeedOperatorLinearAssemble() 538 539 @param[in] op CeedOperator to assemble 540 @param[out] offset Offest for number of entries 541 @param[out] values Values to assemble into matrix 542 543 @return An error code: 0 - success, otherwise - failure 544 545 @ref Developer 546 **/ 547 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 548 CeedVector values) { 549 int ierr; 550 Ceed ceed = op->ceed; 551 if (op->is_composite) 552 // LCOV_EXCL_START 553 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 554 "Composite operator not supported"); 555 // LCOV_EXCL_STOP 556 557 // Assemble QFunction 558 CeedQFunction qf; 559 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 560 CeedVector assembled_qf; 561 CeedElemRestriction rstr_q; 562 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 563 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 564 565 CeedSize qf_length; 566 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 567 568 CeedInt num_input_fields, num_output_fields; 569 CeedOperatorField *input_fields; 570 CeedOperatorField *output_fields; 571 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 572 &num_output_fields, &output_fields); CeedChk(ierr); 573 574 // Determine active input basis 575 CeedQFunctionField *qf_fields; 576 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 577 CeedInt num_eval_mode_in = 0, dim = 1; 578 CeedEvalMode *eval_mode_in = NULL; 579 CeedBasis basis_in = NULL; 580 CeedElemRestriction rstr_in = NULL; 581 for (CeedInt i=0; i<num_input_fields; i++) { 582 CeedVector vec; 583 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 584 if (vec == CEED_VECTOR_ACTIVE) { 585 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 586 CeedChk(ierr); 587 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 588 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 589 CeedChk(ierr); 590 CeedEvalMode eval_mode; 591 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 592 CeedChk(ierr); 593 switch (eval_mode) { 594 case CEED_EVAL_NONE: 595 case CEED_EVAL_INTERP: 596 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 597 eval_mode_in[num_eval_mode_in] = eval_mode; 598 num_eval_mode_in += 1; 599 break; 600 case CEED_EVAL_GRAD: 601 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 602 for (CeedInt d=0; d<dim; d++) { 603 eval_mode_in[num_eval_mode_in+d] = eval_mode; 604 } 605 num_eval_mode_in += dim; 606 break; 607 case CEED_EVAL_WEIGHT: 608 case CEED_EVAL_DIV: 609 case CEED_EVAL_CURL: 610 break; // Caught by QF Assembly 611 } 612 } 613 } 614 615 // Determine active output basis 616 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 617 CeedInt num_eval_mode_out = 0; 618 CeedEvalMode *eval_mode_out = NULL; 619 CeedBasis basis_out = NULL; 620 CeedElemRestriction rstr_out = NULL; 621 for (CeedInt i=0; i<num_output_fields; i++) { 622 CeedVector vec; 623 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 624 if (vec == CEED_VECTOR_ACTIVE) { 625 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 626 CeedChk(ierr); 627 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 628 CeedChk(ierr); 629 CeedEvalMode eval_mode; 630 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 631 CeedChk(ierr); 632 switch (eval_mode) { 633 case CEED_EVAL_NONE: 634 case CEED_EVAL_INTERP: 635 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 636 eval_mode_out[num_eval_mode_out] = eval_mode; 637 num_eval_mode_out += 1; 638 break; 639 case CEED_EVAL_GRAD: 640 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 641 for (CeedInt d=0; d<dim; d++) { 642 eval_mode_out[num_eval_mode_out+d] = eval_mode; 643 } 644 num_eval_mode_out += dim; 645 break; 646 case CEED_EVAL_WEIGHT: 647 case CEED_EVAL_DIV: 648 case CEED_EVAL_CURL: 649 break; // Caught by QF Assembly 650 } 651 } 652 } 653 654 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 655 // LCOV_EXCL_START 656 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 657 "Cannot assemble operator with out inputs/outputs"); 658 // LCOV_EXCL_STOP 659 660 CeedInt num_elem, elem_size, num_qpts, num_comp; 661 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 662 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 663 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 664 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 665 666 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 667 668 // loop over elements and put in data structure 669 const CeedScalar *interp_in, *grad_in; 670 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 671 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 672 673 const CeedScalar *assembled_qf_array; 674 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 675 CeedChk(ierr); 676 677 CeedInt layout_qf[3]; 678 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 679 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 680 681 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 682 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 683 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 684 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 685 num_qpts]; // logically 3-tensor 686 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 687 CeedScalar elem_mat[elem_size * elem_size]; 688 CeedInt count = 0; 689 CeedScalar *vals; 690 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 691 for (CeedInt e = 0; e < num_elem; ++e) { 692 for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) { 693 for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) { 694 for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 695 B_mat_in[ell] = 0.0; 696 } 697 for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 698 B_mat_out[ell] = 0.0; 699 } 700 // Store block-diagonal D matrix as collection of small dense blocks 701 for (CeedInt ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 702 D_mat[ell] = 0.0; 703 } 704 // form element matrix itself (for each block component) 705 for (CeedInt ell = 0; ell < elem_size*elem_size; ++ell) { 706 elem_mat[ell] = 0.0; 707 } 708 for (CeedInt q = 0; q < num_qpts; ++q) { 709 for (CeedInt n = 0; n < elem_size; ++n) { 710 CeedInt d_in = -1; 711 for (CeedInt e_in = 0; e_in < num_eval_mode_in; ++e_in) { 712 const CeedInt qq = num_eval_mode_in*q; 713 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 714 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 715 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 716 d_in += 1; 717 B_mat_in[(qq+e_in)*elem_size + n] += 718 grad_in[(d_in*num_qpts+q) * elem_size + n]; 719 } else { 720 // LCOV_EXCL_START 721 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 722 // LCOV_EXCL_STOP 723 } 724 } 725 CeedInt d_out = -1; 726 for (CeedInt e_out = 0; e_out < num_eval_mode_out; ++e_out) { 727 const CeedInt qq = num_eval_mode_out*q; 728 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 729 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 730 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 731 d_out += 1; 732 B_mat_out[(qq+e_out)*elem_size + n] += 733 grad_in[(d_out*num_qpts+q) * elem_size + n]; 734 } else { 735 // LCOV_EXCL_START 736 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 737 // LCOV_EXCL_STOP 738 } 739 } 740 } 741 for (CeedInt ei = 0; ei < num_eval_mode_out; ++ei) { 742 for (CeedInt ej = 0; ej < num_eval_mode_in; ++ej) { 743 const CeedInt eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 744 +comp_out; 745 const CeedInt index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 746 e*layout_qf[2]; 747 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 748 } 749 } 750 } 751 // Compute B^T*D 752 for (CeedInt ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 753 BTD[ell] = 0.0; 754 } 755 for (CeedInt j = 0; j<elem_size; ++j) { 756 for (CeedInt q = 0; q<num_qpts; ++q) { 757 const CeedInt qq = num_eval_mode_out*q; 758 for (CeedInt ei = 0; ei < num_eval_mode_in; ++ei) { 759 for (CeedInt ej = 0; ej < num_eval_mode_out; ++ej) { 760 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 761 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 762 } 763 } 764 } 765 } 766 767 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 768 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 769 770 // put element matrix in coordinate data structure 771 for (CeedInt i = 0; i < elem_size; ++i) { 772 for (CeedInt j = 0; j < elem_size; ++j) { 773 vals[offset + count] = elem_mat[i*elem_size + j]; 774 count++; 775 } 776 } 777 } 778 } 779 } 780 if (count != local_num_entries) 781 // LCOV_EXCL_START 782 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 783 // LCOV_EXCL_STOP 784 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 785 786 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 787 CeedChk(ierr); 788 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 789 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 790 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 791 792 return CEED_ERROR_SUCCESS; 793 } 794 795 /** 796 @brief Count number of entries for assembled CeedOperator 797 798 @param[in] op CeedOperator to assemble 799 @param[out] num_entries Number of entries in assembled representation 800 801 @return An error code: 0 - success, otherwise - failure 802 803 @ref Utility 804 **/ 805 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 806 CeedInt *num_entries) { 807 int ierr; 808 CeedElemRestriction rstr; 809 CeedInt num_elem, elem_size, num_comp; 810 811 if (op->is_composite) 812 // LCOV_EXCL_START 813 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 814 "Composite operator not supported"); 815 // LCOV_EXCL_STOP 816 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 817 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 818 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 819 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 820 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 821 822 return CEED_ERROR_SUCCESS; 823 } 824 825 /** 826 @brief Common code for creating a multigrid coarse operator and level 827 transfer operators for a CeedOperator 828 829 @param[in] op_fine Fine grid operator 830 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 831 @param[in] rstr_coarse Coarse grid restriction 832 @param[in] basis_coarse Coarse grid active vector basis 833 @param[in] basis_c_to_f Basis for coarse to fine interpolation 834 @param[out] op_coarse Coarse grid operator 835 @param[out] op_prolong Coarse to fine operator 836 @param[out] op_restrict Fine to coarse operator 837 838 @return An error code: 0 - success, otherwise - failure 839 840 @ref Developer 841 **/ 842 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 843 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 844 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 845 CeedOperator *op_restrict) { 846 int ierr; 847 Ceed ceed; 848 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 849 850 // Check for composite operator 851 bool is_composite; 852 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 853 if (is_composite) 854 // LCOV_EXCL_START 855 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 856 "Automatic multigrid setup for composite operators not supported"); 857 // LCOV_EXCL_STOP 858 859 // Coarse Grid 860 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 861 op_coarse); CeedChk(ierr); 862 CeedElemRestriction rstr_fine = NULL; 863 // -- Clone input fields 864 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 865 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 866 rstr_fine = op_fine->input_fields[i]->elem_restr; 867 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 868 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 869 CeedChk(ierr); 870 } else { 871 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 872 op_fine->input_fields[i]->elem_restr, 873 op_fine->input_fields[i]->basis, 874 op_fine->input_fields[i]->vec); CeedChk(ierr); 875 } 876 } 877 // -- Clone output fields 878 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 879 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 880 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 881 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 882 CeedChk(ierr); 883 } else { 884 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 885 op_fine->output_fields[i]->elem_restr, 886 op_fine->output_fields[i]->basis, 887 op_fine->output_fields[i]->vec); CeedChk(ierr); 888 } 889 } 890 // -- Clone QFunctionAssemblyData 891 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 892 &(*op_coarse)->qf_assembled); CeedChk(ierr); 893 894 // Multiplicity vector 895 CeedVector mult_vec, mult_e_vec; 896 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 897 CeedChk(ierr); 898 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 899 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 900 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 901 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 902 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 903 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 904 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 905 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 906 907 // Restriction 908 CeedInt num_comp; 909 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 910 CeedQFunction qf_restrict; 911 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 912 CeedChk(ierr); 913 CeedInt *num_comp_r_data; 914 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 915 num_comp_r_data[0] = num_comp; 916 CeedQFunctionContext ctx_r; 917 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 918 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 919 sizeof(*num_comp_r_data), num_comp_r_data); 920 CeedChk(ierr); 921 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 922 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 923 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 924 CeedChk(ierr); 925 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 926 CeedChk(ierr); 927 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 928 CEED_EVAL_INTERP); CeedChk(ierr); 929 ierr = CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp); CeedChk(ierr); 930 931 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 932 CEED_QFUNCTION_NONE, op_restrict); 933 CeedChk(ierr); 934 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 935 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 936 CeedChk(ierr); 937 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 938 CEED_BASIS_COLLOCATED, mult_vec); 939 CeedChk(ierr); 940 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 941 CEED_VECTOR_ACTIVE); CeedChk(ierr); 942 943 // Prolongation 944 CeedQFunction qf_prolong; 945 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 946 CeedChk(ierr); 947 CeedInt *num_comp_p_data; 948 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 949 num_comp_p_data[0] = num_comp; 950 CeedQFunctionContext ctx_p; 951 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 952 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 953 sizeof(*num_comp_p_data), num_comp_p_data); 954 CeedChk(ierr); 955 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 956 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 957 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 958 CeedChk(ierr); 959 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 960 CeedChk(ierr); 961 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 962 CeedChk(ierr); 963 ierr = CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp); CeedChk(ierr); 964 965 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 966 CEED_QFUNCTION_NONE, op_prolong); 967 CeedChk(ierr); 968 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 969 CEED_VECTOR_ACTIVE); CeedChk(ierr); 970 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 971 CEED_BASIS_COLLOCATED, mult_vec); 972 CeedChk(ierr); 973 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 974 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 975 CeedChk(ierr); 976 977 // Clone name 978 bool has_name = op_fine->name; 979 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 980 ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr); 981 { 982 char *prolongation_name; 983 ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr); 984 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", 985 op_fine->name); 986 ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr); 987 ierr = CeedFree(&prolongation_name); CeedChk(ierr); 988 } 989 { 990 char *restriction_name; 991 ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr); 992 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", 993 op_fine->name); 994 ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr); 995 ierr = CeedFree(&restriction_name); CeedChk(ierr); 996 } 997 998 // Cleanup 999 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 1000 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 1001 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 1002 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 1003 1004 return CEED_ERROR_SUCCESS; 1005 } 1006 1007 /** 1008 @brief Build 1D mass matrix and Laplacian with perturbation 1009 1010 @param[in] interp_1d Interpolation matrix in one dimension 1011 @param[in] grad_1d Gradient matrix in one dimension 1012 @param[in] q_weight_1d Quadrature weights in one dimension 1013 @param[in] P_1d Number of basis nodes in one dimension 1014 @param[in] Q_1d Number of quadrature points in one dimension 1015 @param[in] dim Dimension of basis 1016 @param[out] mass Assembled mass matrix in one dimension 1017 @param[out] laplace Assembled perturbed Laplacian in one dimension 1018 1019 @return An error code: 0 - success, otherwise - failure 1020 1021 @ref Developer 1022 **/ 1023 CeedPragmaOptimizeOff 1024 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 1025 const CeedScalar *grad_1d, 1026 const CeedScalar *q_weight_1d, CeedInt P_1d, 1027 CeedInt Q_1d, CeedInt dim, 1028 CeedScalar *mass, CeedScalar *laplace) { 1029 1030 for (CeedInt i=0; i<P_1d; i++) 1031 for (CeedInt j=0; j<P_1d; j++) { 1032 CeedScalar sum = 0.0; 1033 for (CeedInt k=0; k<Q_1d; k++) 1034 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1035 mass[i+j*P_1d] = sum; 1036 } 1037 // -- Laplacian 1038 for (CeedInt i=0; i<P_1d; i++) 1039 for (CeedInt j=0; j<P_1d; j++) { 1040 CeedScalar sum = 0.0; 1041 for (CeedInt k=0; k<Q_1d; k++) 1042 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1043 laplace[i+j*P_1d] = sum; 1044 } 1045 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1046 for (CeedInt i=0; i<P_1d; i++) 1047 laplace[i+P_1d*i] += perturbation; 1048 return CEED_ERROR_SUCCESS; 1049 } 1050 CeedPragmaOptimizeOn 1051 1052 /// @} 1053 1054 /// ---------------------------------------------------------------------------- 1055 /// CeedOperator Backend API 1056 /// ---------------------------------------------------------------------------- 1057 /// @addtogroup CeedOperatorBackend 1058 /// @{ 1059 1060 /** 1061 @brief Create object holding CeedQFunction assembly data for CeedOperator 1062 1063 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1064 @param[out] data Address of the variable where the newly created 1065 CeedQFunctionAssemblyData will be stored 1066 1067 @return An error code: 0 - success, otherwise - failure 1068 1069 @ref Backend 1070 **/ 1071 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1072 CeedQFunctionAssemblyData *data) { 1073 int ierr; 1074 1075 ierr = CeedCalloc(1, data); CeedChk(ierr); 1076 (*data)->ref_count = 1; 1077 (*data)->ceed = ceed; 1078 ierr = CeedReference(ceed); CeedChk(ierr); 1079 1080 return CEED_ERROR_SUCCESS; 1081 } 1082 1083 /** 1084 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1085 1086 @param data CeedQFunctionAssemblyData to increment the reference counter 1087 1088 @return An error code: 0 - success, otherwise - failure 1089 1090 @ref Backend 1091 **/ 1092 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1093 data->ref_count++; 1094 return CEED_ERROR_SUCCESS; 1095 } 1096 1097 /** 1098 @brief Set re-use of CeedQFunctionAssemblyData 1099 1100 @param data CeedQFunctionAssemblyData to mark for reuse 1101 @param reuse_data Boolean flag indicating data re-use 1102 1103 @return An error code: 0 - success, otherwise - failure 1104 1105 @ref Backend 1106 **/ 1107 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, 1108 bool reuse_data) { 1109 data->reuse_data = reuse_data; 1110 data->needs_data_update = true; 1111 return CEED_ERROR_SUCCESS; 1112 } 1113 1114 /** 1115 @brief Mark QFunctionAssemblyData as stale 1116 1117 @param data CeedQFunctionAssemblyData to mark as stale 1118 @param needs_data_update Boolean flag indicating if update is needed or completed 1119 1120 @return An error code: 0 - success, otherwise - failure 1121 1122 @ref Backend 1123 **/ 1124 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, 1125 bool needs_data_update) { 1126 data->needs_data_update = needs_data_update; 1127 return CEED_ERROR_SUCCESS; 1128 } 1129 1130 /** 1131 @brief Determine if QFunctionAssemblyData needs update 1132 1133 @param[in] data CeedQFunctionAssemblyData to mark as stale 1134 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1135 1136 @return An error code: 0 - success, otherwise - failure 1137 1138 @ref Backend 1139 **/ 1140 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1141 bool *is_update_needed) { 1142 *is_update_needed = !data->reuse_data || data->needs_data_update; 1143 return CEED_ERROR_SUCCESS; 1144 } 1145 1146 /** 1147 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1148 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1149 Note: If `*data_copy` is non-NULL, then it is assumed that 1150 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1151 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1152 the only reference to this CeedQFunctionAssemblyData. 1153 1154 @param data CeedQFunctionAssemblyData to copy reference to 1155 @param[out] data_copy Variable to store copied reference 1156 1157 @return An error code: 0 - success, otherwise - failure 1158 1159 @ref Backend 1160 **/ 1161 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1162 CeedQFunctionAssemblyData *data_copy) { 1163 int ierr; 1164 1165 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1166 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1167 *data_copy = data; 1168 return CEED_ERROR_SUCCESS; 1169 } 1170 1171 /** 1172 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1173 1174 @param[in] data CeedQFunctionAssemblyData to retreive status 1175 @param[out] is_setup Boolean flag for setup status 1176 1177 @return An error code: 0 - success, otherwise - failure 1178 1179 @ref Backend 1180 **/ 1181 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1182 bool *is_setup) { 1183 *is_setup = data->is_setup; 1184 return CEED_ERROR_SUCCESS; 1185 } 1186 1187 /** 1188 @brief Set internal objects for CeedQFunctionAssemblyData 1189 1190 @param[in] data CeedQFunctionAssemblyData to set objects 1191 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1192 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1193 1194 @return An error code: 0 - success, otherwise - failure 1195 1196 @ref Backend 1197 **/ 1198 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1199 CeedVector vec, CeedElemRestriction rstr) { 1200 int ierr; 1201 1202 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1203 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1204 1205 data->is_setup = true; 1206 return CEED_ERROR_SUCCESS; 1207 } 1208 1209 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1210 CeedVector *vec, CeedElemRestriction *rstr) { 1211 int ierr; 1212 1213 if (!data->is_setup) 1214 // LCOV_EXCL_START 1215 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1216 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1217 // LCOV_EXCL_STOP 1218 1219 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1220 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1221 1222 return CEED_ERROR_SUCCESS; 1223 } 1224 1225 /** 1226 @brief Destroy CeedQFunctionAssemblyData 1227 1228 @param[out] data CeedQFunctionAssemblyData to destroy 1229 1230 @return An error code: 0 - success, otherwise - failure 1231 1232 @ref Backend 1233 **/ 1234 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1235 int ierr; 1236 1237 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1238 1239 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1240 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1241 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1242 1243 ierr = CeedFree(data); CeedChk(ierr); 1244 return CEED_ERROR_SUCCESS; 1245 } 1246 1247 /// @} 1248 1249 /// ---------------------------------------------------------------------------- 1250 /// CeedOperator Public API 1251 /// ---------------------------------------------------------------------------- 1252 /// @addtogroup CeedOperatorUser 1253 /// @{ 1254 1255 /** 1256 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1257 1258 This returns a CeedVector containing a matrix at each quadrature point 1259 providing the action of the CeedQFunction associated with the CeedOperator. 1260 The vector 'assembled' is of shape 1261 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1262 and contains column-major matrices representing the action of the 1263 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1264 outputs are in the order provided by the user when adding CeedOperator fields. 1265 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1266 'v', provided in that order, would result in an assembled QFunction that 1267 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1268 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1269 1270 Note: Calling this function asserts that setup is complete 1271 and sets the CeedOperator as immutable. 1272 1273 @param op CeedOperator to assemble CeedQFunction 1274 @param[out] assembled CeedVector to store assembled CeedQFunction at 1275 quadrature points 1276 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1277 CeedQFunction 1278 @param request Address of CeedRequest for non-blocking completion, else 1279 @ref CEED_REQUEST_IMMEDIATE 1280 1281 @return An error code: 0 - success, otherwise - failure 1282 1283 @ref User 1284 **/ 1285 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1286 CeedElemRestriction *rstr, 1287 CeedRequest *request) { 1288 int ierr; 1289 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1290 1291 // Backend version 1292 if (op->LinearAssembleQFunction) { 1293 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1294 CeedChk(ierr); 1295 } else { 1296 // Fallback to reference Ceed 1297 if (!op->op_fallback) { 1298 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1299 } 1300 // Assemble 1301 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1302 rstr, request); CeedChk(ierr); 1303 } 1304 return CEED_ERROR_SUCCESS; 1305 } 1306 1307 /** 1308 @brief Assemble CeedQFunction and store result internall. Return copied 1309 references of stored data to the caller. Caller is responsible for 1310 ownership and destruction of the copied references. See also 1311 @ref CeedOperatorLinearAssembleQFunction 1312 1313 @param op CeedOperator to assemble CeedQFunction 1314 @param assembled CeedVector to store assembled CeedQFunction at 1315 quadrature points 1316 @param rstr CeedElemRestriction for CeedVector containing assembled 1317 CeedQFunction 1318 @param request Address of CeedRequest for non-blocking completion, else 1319 @ref CEED_REQUEST_IMMEDIATE 1320 1321 @return An error code: 0 - success, otherwise - failure 1322 1323 @ref User 1324 **/ 1325 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1326 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1327 int ierr; 1328 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1329 1330 // Backend version 1331 if (op->LinearAssembleQFunctionUpdate) { 1332 bool qf_assembled_is_setup; 1333 CeedVector assembled_vec = NULL; 1334 CeedElemRestriction assembled_rstr = NULL; 1335 1336 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1337 &qf_assembled_is_setup); CeedChk(ierr); 1338 if (qf_assembled_is_setup) { 1339 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1340 &assembled_rstr); CeedChk(ierr); 1341 1342 bool update_needed; 1343 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1344 &update_needed); CeedChk(ierr); 1345 if (update_needed) { 1346 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1347 request); CeedChk(ierr); 1348 } 1349 } else { 1350 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1351 request); CeedChk(ierr); 1352 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1353 assembled_rstr); CeedChk(ierr); 1354 } 1355 ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); 1356 CeedChk(ierr); 1357 1358 // Copy reference to internally held copy 1359 *assembled = NULL; 1360 *rstr = NULL; 1361 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1362 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1363 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1364 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1365 } else { 1366 // Fallback to reference Ceed 1367 if (!op->op_fallback) { 1368 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1369 } 1370 // Assemble 1371 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1372 assembled, rstr, request); CeedChk(ierr); 1373 } 1374 1375 return CEED_ERROR_SUCCESS; 1376 } 1377 1378 /** 1379 @brief Assemble the diagonal of a square linear CeedOperator 1380 1381 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1382 1383 Note: Currently only non-composite CeedOperators with a single field and 1384 composite CeedOperators with single field sub-operators are supported. 1385 1386 Note: Calling this function asserts that setup is complete 1387 and sets the CeedOperator as immutable. 1388 1389 @param op CeedOperator to assemble CeedQFunction 1390 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1391 @param request Address of CeedRequest for non-blocking completion, else 1392 @ref CEED_REQUEST_IMMEDIATE 1393 1394 @return An error code: 0 - success, otherwise - failure 1395 1396 @ref User 1397 **/ 1398 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1399 CeedRequest *request) { 1400 int ierr; 1401 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1402 1403 CeedSize input_size = 0, output_size = 0; 1404 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1405 CeedChk(ierr); 1406 if (input_size != output_size) 1407 // LCOV_EXCL_START 1408 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1409 // LCOV_EXCL_STOP 1410 1411 // Use backend version, if available 1412 if (op->LinearAssembleDiagonal) { 1413 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1414 return CEED_ERROR_SUCCESS; 1415 } else if (op->LinearAssembleAddDiagonal) { 1416 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1417 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1418 return CEED_ERROR_SUCCESS; 1419 } else { 1420 // Check for valid fallback resource 1421 const char *resource, *fallback_resource; 1422 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1423 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1424 CeedChk(ierr); 1425 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1426 // Fallback to reference Ceed 1427 if (!op->op_fallback) { 1428 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1429 } 1430 // Assemble 1431 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1432 CeedChk(ierr); 1433 return CEED_ERROR_SUCCESS; 1434 } 1435 } 1436 1437 // Default interface implementation 1438 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1439 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1440 CeedChk(ierr); 1441 return CEED_ERROR_SUCCESS; 1442 } 1443 1444 /** 1445 @brief Assemble the diagonal of a square linear CeedOperator 1446 1447 This sums into a CeedVector the diagonal of a linear CeedOperator. 1448 1449 Note: Currently only non-composite CeedOperators with a single field and 1450 composite CeedOperators with single field sub-operators are supported. 1451 1452 Note: Calling this function asserts that setup is complete 1453 and sets the CeedOperator as immutable. 1454 1455 @param op CeedOperator to assemble CeedQFunction 1456 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1457 @param request Address of CeedRequest for non-blocking completion, else 1458 @ref CEED_REQUEST_IMMEDIATE 1459 1460 @return An error code: 0 - success, otherwise - failure 1461 1462 @ref User 1463 **/ 1464 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1465 CeedRequest *request) { 1466 int ierr; 1467 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1468 1469 CeedSize input_size = 0, output_size = 0; 1470 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1471 CeedChk(ierr); 1472 if (input_size != output_size) 1473 // LCOV_EXCL_START 1474 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1475 // LCOV_EXCL_STOP 1476 1477 // Use backend version, if available 1478 if (op->LinearAssembleAddDiagonal) { 1479 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1480 return CEED_ERROR_SUCCESS; 1481 } else { 1482 // Check for valid fallback resource 1483 const char *resource, *fallback_resource; 1484 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1485 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1486 CeedChk(ierr); 1487 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1488 // Fallback to reference Ceed 1489 if (!op->op_fallback) { 1490 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1491 } 1492 // Assemble 1493 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1494 request); CeedChk(ierr); 1495 return CEED_ERROR_SUCCESS; 1496 } 1497 } 1498 1499 // Default interface implementation 1500 bool is_composite; 1501 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1502 if (is_composite) { 1503 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1504 false, assembled); CeedChk(ierr); 1505 return CEED_ERROR_SUCCESS; 1506 } else { 1507 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1508 CeedChk(ierr); 1509 return CEED_ERROR_SUCCESS; 1510 } 1511 } 1512 1513 /** 1514 @brief Assemble the point block diagonal of a square linear CeedOperator 1515 1516 This overwrites a CeedVector with the point block diagonal of a linear 1517 CeedOperator. 1518 1519 Note: Currently only non-composite CeedOperators with a single field and 1520 composite CeedOperators with single field sub-operators are supported. 1521 1522 Note: Calling this function asserts that setup is complete 1523 and sets the CeedOperator as immutable. 1524 1525 @param op CeedOperator to assemble CeedQFunction 1526 @param[out] assembled CeedVector to store assembled CeedOperator point block 1527 diagonal, provided in row-major form with an 1528 @a num_comp * @a num_comp block at each node. The dimensions 1529 of this vector are derived from the active vector 1530 for the CeedOperator. The array has shape 1531 [nodes, component out, component in]. 1532 @param request Address of CeedRequest for non-blocking completion, else 1533 CEED_REQUEST_IMMEDIATE 1534 1535 @return An error code: 0 - success, otherwise - failure 1536 1537 @ref User 1538 **/ 1539 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1540 CeedVector assembled, CeedRequest *request) { 1541 int ierr; 1542 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1543 1544 CeedSize input_size = 0, output_size = 0; 1545 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1546 CeedChk(ierr); 1547 if (input_size != output_size) 1548 // LCOV_EXCL_START 1549 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1550 // LCOV_EXCL_STOP 1551 1552 // Use backend version, if available 1553 if (op->LinearAssemblePointBlockDiagonal) { 1554 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1555 CeedChk(ierr); 1556 return CEED_ERROR_SUCCESS; 1557 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1558 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1559 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1560 request); CeedChk(ierr); 1561 return CEED_ERROR_SUCCESS; 1562 } else { 1563 // Check for valid fallback resource 1564 const char *resource, *fallback_resource; 1565 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1566 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1567 CeedChk(ierr); 1568 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1569 // Fallback to reference Ceed 1570 if (!op->op_fallback) { 1571 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1572 } 1573 // Assemble 1574 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1575 assembled, request); CeedChk(ierr); 1576 return CEED_ERROR_SUCCESS; 1577 } 1578 } 1579 1580 // Default interface implementation 1581 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1582 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1583 CeedChk(ierr); 1584 return CEED_ERROR_SUCCESS; 1585 } 1586 1587 /** 1588 @brief Assemble the point block diagonal of a square linear CeedOperator 1589 1590 This sums into a CeedVector with the point block diagonal of a linear 1591 CeedOperator. 1592 1593 Note: Currently only non-composite CeedOperators with a single field and 1594 composite CeedOperators with single field sub-operators are supported. 1595 1596 Note: Calling this function asserts that setup is complete 1597 and sets the CeedOperator as immutable. 1598 1599 @param op CeedOperator to assemble CeedQFunction 1600 @param[out] assembled CeedVector to store assembled CeedOperator point block 1601 diagonal, provided in row-major form with an 1602 @a num_comp * @a num_comp block at each node. The dimensions 1603 of this vector are derived from the active vector 1604 for the CeedOperator. The array has shape 1605 [nodes, component out, component in]. 1606 @param request Address of CeedRequest for non-blocking completion, else 1607 CEED_REQUEST_IMMEDIATE 1608 1609 @return An error code: 0 - success, otherwise - failure 1610 1611 @ref User 1612 **/ 1613 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1614 CeedVector assembled, CeedRequest *request) { 1615 int ierr; 1616 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1617 1618 CeedSize input_size = 0, output_size = 0; 1619 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1620 CeedChk(ierr); 1621 if (input_size != output_size) 1622 // LCOV_EXCL_START 1623 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1624 // LCOV_EXCL_STOP 1625 1626 // Use backend version, if available 1627 if (op->LinearAssembleAddPointBlockDiagonal) { 1628 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1629 CeedChk(ierr); 1630 return CEED_ERROR_SUCCESS; 1631 } else { 1632 // Check for valid fallback resource 1633 const char *resource, *fallback_resource; 1634 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1635 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1636 CeedChk(ierr); 1637 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1638 // Fallback to reference Ceed 1639 if (!op->op_fallback) { 1640 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1641 } 1642 // Assemble 1643 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1644 assembled, request); CeedChk(ierr); 1645 return CEED_ERROR_SUCCESS; 1646 } 1647 } 1648 1649 // Default interface implemenation 1650 bool is_composite; 1651 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1652 if (is_composite) { 1653 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1654 true, assembled); CeedChk(ierr); 1655 return CEED_ERROR_SUCCESS; 1656 } else { 1657 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1658 CeedChk(ierr); 1659 return CEED_ERROR_SUCCESS; 1660 } 1661 } 1662 1663 /** 1664 @brief Fully assemble the nonzero pattern of a linear operator. 1665 1666 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1667 1668 The assembly routines use coordinate format, with num_entries tuples of the 1669 form (i, j, value) which indicate that value should be added to the matrix 1670 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1671 This function returns the number of entries and their (i, j) locations, 1672 while CeedOperatorLinearAssemble() provides the values in the same 1673 ordering. 1674 1675 This will generally be slow unless your operator is low-order. 1676 1677 Note: Calling this function asserts that setup is complete 1678 and sets the CeedOperator as immutable. 1679 1680 @param[in] op CeedOperator to assemble 1681 @param[out] num_entries Number of entries in coordinate nonzero pattern 1682 @param[out] rows Row number for each entry 1683 @param[out] cols Column number for each entry 1684 1685 @ref User 1686 **/ 1687 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1688 CeedInt **rows, CeedInt **cols) { 1689 int ierr; 1690 CeedInt num_suboperators, single_entries; 1691 CeedOperator *sub_operators; 1692 bool is_composite; 1693 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1694 1695 // Use backend version, if available 1696 if (op->LinearAssembleSymbolic) { 1697 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1698 return CEED_ERROR_SUCCESS; 1699 } else { 1700 // Check for valid fallback resource 1701 const char *resource, *fallback_resource; 1702 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1703 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1704 CeedChk(ierr); 1705 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1706 // Fallback to reference Ceed 1707 if (!op->op_fallback) { 1708 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1709 } 1710 // Assemble 1711 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1712 cols); CeedChk(ierr); 1713 return CEED_ERROR_SUCCESS; 1714 } 1715 } 1716 1717 // Default interface implementation 1718 1719 // count entries and allocate rows, cols arrays 1720 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1721 *num_entries = 0; 1722 if (is_composite) { 1723 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1724 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1725 for (CeedInt k = 0; k < num_suboperators; ++k) { 1726 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1727 &single_entries); CeedChk(ierr); 1728 *num_entries += single_entries; 1729 } 1730 } else { 1731 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1732 &single_entries); CeedChk(ierr); 1733 *num_entries += single_entries; 1734 } 1735 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1736 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1737 1738 // assemble nonzero locations 1739 CeedInt offset = 0; 1740 if (is_composite) { 1741 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1742 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1743 for (CeedInt k = 0; k < num_suboperators; ++k) { 1744 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1745 *cols); CeedChk(ierr); 1746 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1747 &single_entries); 1748 CeedChk(ierr); 1749 offset += single_entries; 1750 } 1751 } else { 1752 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1753 CeedChk(ierr); 1754 } 1755 1756 return CEED_ERROR_SUCCESS; 1757 } 1758 1759 /** 1760 @brief Fully assemble the nonzero entries of a linear operator. 1761 1762 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1763 1764 The assembly routines use coordinate format, with num_entries tuples of the 1765 form (i, j, value) which indicate that value should be added to the matrix 1766 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1767 This function returns the values of the nonzero entries to be added, their 1768 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1769 1770 This will generally be slow unless your operator is low-order. 1771 1772 Note: Calling this function asserts that setup is complete 1773 and sets the CeedOperator as immutable. 1774 1775 @param[in] op CeedOperator to assemble 1776 @param[out] values Values to assemble into matrix 1777 1778 @ref User 1779 **/ 1780 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1781 int ierr; 1782 CeedInt num_suboperators, single_entries = 0; 1783 CeedOperator *sub_operators; 1784 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1785 1786 // Use backend version, if available 1787 if (op->LinearAssemble) { 1788 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1789 return CEED_ERROR_SUCCESS; 1790 } else { 1791 // Check for valid fallback resource 1792 const char *resource, *fallback_resource; 1793 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1794 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1795 CeedChk(ierr); 1796 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1797 // Fallback to reference Ceed 1798 if (!op->op_fallback) { 1799 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1800 } 1801 // Assemble 1802 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1803 return CEED_ERROR_SUCCESS; 1804 } 1805 } 1806 1807 // Default interface implementation 1808 bool is_composite; 1809 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1810 1811 CeedInt offset = 0; 1812 if (is_composite) { 1813 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1814 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1815 for (CeedInt k = 0; k < num_suboperators; ++k) { 1816 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1817 CeedChk(ierr); 1818 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1819 &single_entries); 1820 CeedChk(ierr); 1821 offset += single_entries; 1822 } 1823 } else { 1824 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1825 } 1826 1827 return CEED_ERROR_SUCCESS; 1828 } 1829 1830 /** 1831 @brief Create a multigrid coarse operator and level transfer operators 1832 for a CeedOperator, creating the prolongation basis from the 1833 fine and coarse grid interpolation 1834 1835 Note: Calling this function asserts that setup is complete 1836 and sets the CeedOperator as immutable. 1837 1838 @param[in] op_fine Fine grid operator 1839 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1840 @param[in] rstr_coarse Coarse grid restriction 1841 @param[in] basis_coarse Coarse grid active vector basis 1842 @param[out] op_coarse Coarse grid operator 1843 @param[out] op_prolong Coarse to fine operator 1844 @param[out] op_restrict Fine to coarse operator 1845 1846 @return An error code: 0 - success, otherwise - failure 1847 1848 @ref User 1849 **/ 1850 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1851 CeedVector p_mult_fine, 1852 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1853 CeedOperator *op_coarse, CeedOperator *op_prolong, 1854 CeedOperator *op_restrict) { 1855 int ierr; 1856 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1857 Ceed ceed; 1858 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1859 1860 // Check for compatible quadrature spaces 1861 CeedBasis basis_fine; 1862 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1863 CeedInt Q_f, Q_c; 1864 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1865 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1866 if (Q_f != Q_c) 1867 // LCOV_EXCL_START 1868 return CeedError(ceed, CEED_ERROR_DIMENSION, 1869 "Bases must have compatible quadrature spaces"); 1870 // LCOV_EXCL_STOP 1871 1872 // Coarse to fine basis 1873 CeedInt P_f, P_c, Q = Q_f; 1874 bool is_tensor_f, is_tensor_c; 1875 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1876 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1877 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1878 if (is_tensor_f && is_tensor_c) { 1879 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1880 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1881 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1882 } else if (!is_tensor_f && !is_tensor_c) { 1883 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1884 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1885 } else { 1886 // LCOV_EXCL_START 1887 return CeedError(ceed, CEED_ERROR_MINOR, 1888 "Bases must both be tensor or non-tensor"); 1889 // LCOV_EXCL_STOP 1890 } 1891 1892 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1893 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1894 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1895 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1896 if (is_tensor_f) { 1897 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1898 memcpy(interp_c, basis_coarse->interp_1d, 1899 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1900 } else { 1901 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1902 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1903 } 1904 1905 // -- QR Factorization, interp_f = Q R 1906 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1907 1908 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1909 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1910 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1911 1912 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1913 for (CeedInt j=0; j<P_c; j++) { // Column j 1914 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1915 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1916 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1917 for (CeedInt k=i+1; k<P_f; k++) 1918 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1919 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1920 } 1921 } 1922 ierr = CeedFree(&tau); CeedChk(ierr); 1923 ierr = CeedFree(&interp_c); CeedChk(ierr); 1924 ierr = CeedFree(&interp_f); CeedChk(ierr); 1925 1926 // Complete with interp_c_to_f versions of code 1927 if (is_tensor_f) { 1928 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1929 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1930 CeedChk(ierr); 1931 } else { 1932 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1933 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1934 CeedChk(ierr); 1935 } 1936 1937 // Cleanup 1938 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1939 return CEED_ERROR_SUCCESS; 1940 } 1941 1942 /** 1943 @brief Create a multigrid coarse operator and level transfer operators 1944 for a CeedOperator with a tensor basis for the active basis 1945 1946 Note: Calling this function asserts that setup is complete 1947 and sets the CeedOperator as immutable. 1948 1949 @param[in] op_fine Fine grid operator 1950 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1951 @param[in] rstr_coarse Coarse grid restriction 1952 @param[in] basis_coarse Coarse grid active vector basis 1953 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1954 @param[out] op_coarse Coarse grid operator 1955 @param[out] op_prolong Coarse to fine operator 1956 @param[out] op_restrict Fine to coarse operator 1957 1958 @return An error code: 0 - success, otherwise - failure 1959 1960 @ref User 1961 **/ 1962 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1963 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1964 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1965 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1966 int ierr; 1967 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1968 Ceed ceed; 1969 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1970 1971 // Check for compatible quadrature spaces 1972 CeedBasis basis_fine; 1973 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1974 CeedInt Q_f, Q_c; 1975 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1976 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1977 if (Q_f != Q_c) 1978 // LCOV_EXCL_START 1979 return CeedError(ceed, CEED_ERROR_DIMENSION, 1980 "Bases must have compatible quadrature spaces"); 1981 // LCOV_EXCL_STOP 1982 1983 // Coarse to fine basis 1984 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1985 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1986 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1987 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1988 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1989 CeedChk(ierr); 1990 P_1d_c = dim == 1 ? num_nodes_c : 1991 dim == 2 ? sqrt(num_nodes_c) : 1992 cbrt(num_nodes_c); 1993 CeedScalar *q_ref, *q_weight, *grad; 1994 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1995 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1996 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1997 CeedBasis basis_c_to_f; 1998 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1999 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2000 CeedChk(ierr); 2001 ierr = CeedFree(&q_ref); CeedChk(ierr); 2002 ierr = CeedFree(&q_weight); CeedChk(ierr); 2003 ierr = CeedFree(&grad); CeedChk(ierr); 2004 2005 // Core code 2006 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2007 basis_coarse, basis_c_to_f, op_coarse, 2008 op_prolong, op_restrict); 2009 CeedChk(ierr); 2010 return CEED_ERROR_SUCCESS; 2011 } 2012 2013 /** 2014 @brief Create a multigrid coarse operator and level transfer operators 2015 for a CeedOperator with a non-tensor basis for the active vector 2016 2017 Note: Calling this function asserts that setup is complete 2018 and sets the CeedOperator as immutable. 2019 2020 @param[in] op_fine Fine grid operator 2021 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2022 @param[in] rstr_coarse Coarse grid restriction 2023 @param[in] basis_coarse Coarse grid active vector basis 2024 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 2025 @param[out] op_coarse Coarse grid operator 2026 @param[out] op_prolong Coarse to fine operator 2027 @param[out] op_restrict Fine to coarse operator 2028 2029 @return An error code: 0 - success, otherwise - failure 2030 2031 @ref User 2032 **/ 2033 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 2034 CeedVector p_mult_fine, 2035 CeedElemRestriction rstr_coarse, 2036 CeedBasis basis_coarse, 2037 const CeedScalar *interp_c_to_f, 2038 CeedOperator *op_coarse, 2039 CeedOperator *op_prolong, 2040 CeedOperator *op_restrict) { 2041 int ierr; 2042 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2043 Ceed ceed; 2044 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2045 2046 // Check for compatible quadrature spaces 2047 CeedBasis basis_fine; 2048 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2049 CeedInt Q_f, Q_c; 2050 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2051 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2052 if (Q_f != Q_c) 2053 // LCOV_EXCL_START 2054 return CeedError(ceed, CEED_ERROR_DIMENSION, 2055 "Bases must have compatible quadrature spaces"); 2056 // LCOV_EXCL_STOP 2057 2058 // Coarse to fine basis 2059 CeedElemTopology topo; 2060 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 2061 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2062 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2063 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2064 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 2065 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2066 CeedChk(ierr); 2067 CeedScalar *q_ref, *q_weight, *grad; 2068 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 2069 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 2070 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 2071 CeedBasis basis_c_to_f; 2072 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 2073 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2074 CeedChk(ierr); 2075 ierr = CeedFree(&q_ref); CeedChk(ierr); 2076 ierr = CeedFree(&q_weight); CeedChk(ierr); 2077 ierr = CeedFree(&grad); CeedChk(ierr); 2078 2079 // Core code 2080 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2081 basis_coarse, basis_c_to_f, op_coarse, 2082 op_prolong, op_restrict); 2083 CeedChk(ierr); 2084 return CEED_ERROR_SUCCESS; 2085 } 2086 2087 /** 2088 @brief Build a FDM based approximate inverse for each element for a 2089 CeedOperator 2090 2091 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2092 Method based approximate inverse. This function obtains the simultaneous 2093 diagonalization for the 1D mass and Laplacian operators, 2094 M = V^T V, K = V^T S V. 2095 The assembled QFunction is used to modify the eigenvalues from simultaneous 2096 diagonalization and obtain an approximate inverse of the form 2097 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2098 associated CeedQFunction must therefore also be linear. 2099 2100 Note: Calling this function asserts that setup is complete 2101 and sets the CeedOperator as immutable. 2102 2103 @param op CeedOperator to create element inverses 2104 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2105 for each element 2106 @param request Address of CeedRequest for non-blocking completion, else 2107 @ref CEED_REQUEST_IMMEDIATE 2108 2109 @return An error code: 0 - success, otherwise - failure 2110 2111 @ref User 2112 **/ 2113 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2114 CeedRequest *request) { 2115 int ierr; 2116 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2117 2118 // Use backend version, if available 2119 if (op->CreateFDMElementInverse) { 2120 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2121 return CEED_ERROR_SUCCESS; 2122 } else { 2123 // Check for valid fallback resource 2124 const char *resource, *fallback_resource; 2125 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2126 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2127 CeedChk(ierr); 2128 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2129 // Fallback to reference Ceed 2130 if (!op->op_fallback) { 2131 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2132 } 2133 // Assemble 2134 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2135 CeedChk(ierr); 2136 return CEED_ERROR_SUCCESS; 2137 } 2138 } 2139 2140 // Interface implementation 2141 Ceed ceed, ceed_parent; 2142 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2143 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2144 ceed_parent = ceed_parent ? ceed_parent : ceed; 2145 CeedQFunction qf; 2146 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2147 2148 // Determine active input basis 2149 bool interp = false, grad = false; 2150 CeedBasis basis = NULL; 2151 CeedElemRestriction rstr = NULL; 2152 CeedOperatorField *op_fields; 2153 CeedQFunctionField *qf_fields; 2154 CeedInt num_input_fields; 2155 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2156 CeedChk(ierr); 2157 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2158 for (CeedInt i=0; i<num_input_fields; i++) { 2159 CeedVector vec; 2160 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2161 if (vec == CEED_VECTOR_ACTIVE) { 2162 CeedEvalMode eval_mode; 2163 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2164 interp = interp || eval_mode == CEED_EVAL_INTERP; 2165 grad = grad || eval_mode == CEED_EVAL_GRAD; 2166 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2167 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2168 } 2169 } 2170 if (!basis) 2171 // LCOV_EXCL_START 2172 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2173 // LCOV_EXCL_STOP 2174 CeedSize l_size = 1; 2175 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2176 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2177 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2178 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2179 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2180 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2181 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2182 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2183 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2184 2185 // Build and diagonalize 1D Mass and Laplacian 2186 bool tensor_basis; 2187 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2188 if (!tensor_basis) 2189 // LCOV_EXCL_START 2190 return CeedError(ceed, CEED_ERROR_BACKEND, 2191 "FDMElementInverse only supported for tensor " 2192 "bases"); 2193 // LCOV_EXCL_STOP 2194 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2195 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2196 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2197 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2198 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2199 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2200 // -- Build matrices 2201 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2202 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2203 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2204 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2205 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2206 mass, laplace); CeedChk(ierr); 2207 2208 // -- Diagonalize 2209 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2210 CeedChk(ierr); 2211 ierr = CeedFree(&mass); CeedChk(ierr); 2212 ierr = CeedFree(&laplace); CeedChk(ierr); 2213 for (CeedInt i=0; i<P_1d; i++) 2214 for (CeedInt j=0; j<P_1d; j++) 2215 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2216 ierr = CeedFree(&x); CeedChk(ierr); 2217 2218 // Assemble QFunction 2219 CeedVector assembled; 2220 CeedElemRestriction rstr_qf; 2221 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2222 &rstr_qf, request); CeedChk(ierr); 2223 CeedInt layout[3]; 2224 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2225 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2226 CeedScalar max_norm = 0; 2227 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2228 2229 // Calculate element averages 2230 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2231 CeedScalar *elem_avg; 2232 const CeedScalar *assembled_array, *q_weight_array; 2233 CeedVector q_weight; 2234 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2235 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2236 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2237 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2238 CeedChk(ierr); 2239 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2240 CeedChk(ierr); 2241 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2242 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2243 for (CeedInt e=0; e<num_elem; e++) { 2244 CeedInt count = 0; 2245 for (CeedInt q=0; q<num_qpts; q++) 2246 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2247 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2248 qf_value_bound) { 2249 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2250 q_weight_array[q]; 2251 count++; 2252 } 2253 if (count) { 2254 elem_avg[e] /= count; 2255 } else { 2256 elem_avg[e] = 1.0; 2257 } 2258 } 2259 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2260 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2261 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2262 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2263 2264 // Build FDM diagonal 2265 CeedVector q_data; 2266 CeedScalar *q_data_array, *fdm_diagonal; 2267 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2268 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2269 for (CeedInt c=0; c<num_comp; c++) 2270 for (CeedInt n=0; n<elem_size; n++) { 2271 if (interp) 2272 fdm_diagonal[c*elem_size + n] = 1.0; 2273 if (grad) 2274 for (CeedInt d=0; d<dim; d++) { 2275 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2276 fdm_diagonal[c*elem_size + n] += lambda[i]; 2277 } 2278 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2279 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2280 } 2281 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2282 CeedChk(ierr); 2283 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2284 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2285 CeedChk(ierr); 2286 for (CeedInt e=0; e<num_elem; e++) 2287 for (CeedInt c=0; c<num_comp; c++) 2288 for (CeedInt n=0; n<elem_size; n++) 2289 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2290 fdm_diagonal[c*elem_size + n]); 2291 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2292 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2293 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2294 2295 // Setup FDM operator 2296 // -- Basis 2297 CeedBasis fdm_basis; 2298 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2299 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2300 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2301 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2302 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2303 fdm_interp, grad_dummy, q_ref_dummy, 2304 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2305 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2306 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2307 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2308 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2309 ierr = CeedFree(&lambda); CeedChk(ierr); 2310 2311 // -- Restriction 2312 CeedElemRestriction rstr_qd_i; 2313 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2314 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2315 num_comp, num_elem*num_comp*elem_size, 2316 strides, &rstr_qd_i); CeedChk(ierr); 2317 // -- QFunction 2318 CeedQFunction qf_fdm; 2319 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2320 CeedChk(ierr); 2321 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2322 CeedChk(ierr); 2323 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2324 CeedChk(ierr); 2325 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2326 CeedChk(ierr); 2327 ierr = CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp); CeedChk(ierr); 2328 // -- QFunction context 2329 CeedInt *num_comp_data; 2330 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2331 num_comp_data[0] = num_comp; 2332 CeedQFunctionContext ctx_fdm; 2333 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2334 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2335 sizeof(*num_comp_data), num_comp_data); 2336 CeedChk(ierr); 2337 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2338 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2339 // -- Operator 2340 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2341 CeedChk(ierr); 2342 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2343 CeedChk(ierr); 2344 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2345 q_data); CeedChk(ierr); 2346 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2347 CeedChk(ierr); 2348 2349 // Cleanup 2350 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2351 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2352 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2353 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2354 2355 return CEED_ERROR_SUCCESS; 2356 } 2357 2358 /// @} 2359