1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->is_interface_setup = false; 75 op_ref->is_backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 op->op_fallback = op_ref; 79 80 // Clone QF 81 CeedQFunction qf_ref; 82 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 83 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 84 qf_ref->data = NULL; 85 qf_ref->ceed = ceed_ref; 86 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 87 op_ref->qf = qf_ref; 88 op->qf_fallback = qf_ref; 89 return CEED_ERROR_SUCCESS; 90 } 91 92 /** 93 @brief Select correct basis matrix pointer based on CeedEvalMode 94 95 @param[in] eval_mode Current basis evaluation mode 96 @param[in] identity Pointer to identity matrix 97 @param[in] interp Pointer to interpolation matrix 98 @param[in] grad Pointer to gradient matrix 99 @param[out] basis_ptr Basis pointer to set 100 101 @return none 102 103 @ref Developer 104 **/ 105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 106 const CeedScalar *identity, const CeedScalar *interp, 107 const CeedScalar *grad, const CeedScalar **basis_ptr) { 108 switch (eval_mode) { 109 case CEED_EVAL_NONE: 110 *basis_ptr = identity; 111 break; 112 case CEED_EVAL_INTERP: 113 *basis_ptr = interp; 114 break; 115 case CEED_EVAL_GRAD: 116 *basis_ptr = grad; 117 break; 118 case CEED_EVAL_WEIGHT: 119 case CEED_EVAL_DIV: 120 case CEED_EVAL_CURL: 121 break; // Caught by QF Assembly 122 } 123 } 124 125 /** 126 @brief Create point block restriction for active operator field 127 128 @param[in] rstr Original CeedElemRestriction for active field 129 @param[out] pointblock_rstr Address of the variable where the newly created 130 CeedElemRestriction will be stored 131 132 @return An error code: 0 - success, otherwise - failure 133 134 @ref Developer 135 **/ 136 static int CeedOperatorCreateActivePointBlockRestriction( 137 CeedElemRestriction rstr, 138 CeedElemRestriction *pointblock_rstr) { 139 int ierr; 140 Ceed ceed; 141 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 142 const CeedInt *offsets; 143 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 144 CeedChk(ierr); 145 146 // Expand offsets 147 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 148 *pointblock_offsets; 149 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 152 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 153 CeedInt shift = num_comp; 154 if (comp_stride != 1) 155 shift *= num_comp; 156 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 157 CeedChk(ierr); 158 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 159 pointblock_offsets[i] = offsets[i]*shift; 160 if (pointblock_offsets[i] > max) 161 max = pointblock_offsets[i]; 162 } 163 164 // Create new restriction 165 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 166 1, max + num_comp*num_comp, CEED_MEM_HOST, 167 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 168 CeedChk(ierr); 169 170 // Cleanup 171 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 172 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else 181 CEED_REQUEST_IMMEDIATE 182 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 183 @param[out] assembled CeedVector to store assembled diagonal 184 185 @return An error code: 0 - success, otherwise - failure 186 187 @ref Developer 188 **/ 189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 190 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 191 int ierr; 192 Ceed ceed; 193 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 194 195 // Assemble QFunction 196 CeedQFunction qf; 197 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 198 CeedInt num_input_fields, num_output_fields; 199 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 200 CeedChk(ierr); 201 CeedVector assembled_qf; 202 CeedElemRestriction rstr; 203 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 204 &rstr, request); CeedChk(ierr); 205 CeedInt layout[3]; 206 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 207 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 208 CeedScalar max_norm = 0; 209 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 210 211 // Determine active input basis 212 CeedOperatorField *op_fields; 213 CeedQFunctionField *qf_fields; 214 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 215 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 216 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 217 CeedEvalMode *eval_mode_in = NULL; 218 CeedBasis basis_in = NULL; 219 CeedElemRestriction rstr_in = NULL; 220 for (CeedInt i=0; i<num_input_fields; i++) { 221 CeedVector vec; 222 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 223 if (vec == CEED_VECTOR_ACTIVE) { 224 CeedElemRestriction rstr; 225 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 226 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 227 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 228 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 229 if (rstr_in && rstr_in != rstr) 230 // LCOV_EXCL_START 231 return CeedError(ceed, CEED_ERROR_BACKEND, 232 "Multi-field non-composite operator diagonal assembly not supported"); 233 // LCOV_EXCL_STOP 234 rstr_in = rstr; 235 CeedEvalMode eval_mode; 236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 237 switch (eval_mode) { 238 case CEED_EVAL_NONE: 239 case CEED_EVAL_INTERP: 240 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 241 eval_mode_in[num_eval_mode_in] = eval_mode; 242 num_eval_mode_in += 1; 243 break; 244 case CEED_EVAL_GRAD: 245 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 246 for (CeedInt d=0; d<dim; d++) 247 eval_mode_in[num_eval_mode_in+d] = eval_mode; 248 num_eval_mode_in += dim; 249 break; 250 case CEED_EVAL_WEIGHT: 251 case CEED_EVAL_DIV: 252 case CEED_EVAL_CURL: 253 break; // Caught by QF Assembly 254 } 255 } 256 } 257 258 // Determine active output basis 259 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 260 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 261 CeedInt num_eval_mode_out = 0; 262 CeedEvalMode *eval_mode_out = NULL; 263 CeedBasis basis_out = NULL; 264 CeedElemRestriction rstr_out = NULL; 265 for (CeedInt i=0; i<num_output_fields; i++) { 266 CeedVector vec; 267 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 268 if (vec == CEED_VECTOR_ACTIVE) { 269 CeedElemRestriction rstr; 270 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 271 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 272 if (rstr_out && rstr_out != rstr) 273 // LCOV_EXCL_START 274 return CeedError(ceed, CEED_ERROR_BACKEND, 275 "Multi-field non-composite operator diagonal assembly not supported"); 276 // LCOV_EXCL_STOP 277 rstr_out = rstr; 278 CeedEvalMode eval_mode; 279 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 280 switch (eval_mode) { 281 case CEED_EVAL_NONE: 282 case CEED_EVAL_INTERP: 283 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 284 eval_mode_out[num_eval_mode_out] = eval_mode; 285 num_eval_mode_out += 1; 286 break; 287 case CEED_EVAL_GRAD: 288 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 289 for (CeedInt d=0; d<dim; d++) 290 eval_mode_out[num_eval_mode_out+d] = eval_mode; 291 num_eval_mode_out += dim; 292 break; 293 case CEED_EVAL_WEIGHT: 294 case CEED_EVAL_DIV: 295 case CEED_EVAL_CURL: 296 break; // Caught by QF Assembly 297 } 298 } 299 } 300 301 // Assemble point block diagonal restriction, if needed 302 CeedElemRestriction diag_rstr = rstr_out; 303 if (is_pointblock) { 304 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 305 CeedChk(ierr); 306 } 307 308 // Create diagonal vector 309 CeedVector elem_diag; 310 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 311 CeedChk(ierr); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array; 315 const CeedScalar *assembled_qf_array; 316 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 317 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 318 CeedChk(ierr); 319 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 320 CeedChk(ierr); 321 CeedInt num_elem, num_nodes, num_qpts; 322 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 323 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 324 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 325 // Basis matrices 326 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 327 CeedScalar *identity = NULL; 328 bool evalNone = false; 329 for (CeedInt i=0; i<num_eval_mode_in; i++) 330 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 331 for (CeedInt i=0; i<num_eval_mode_out; i++) 332 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 333 if (evalNone) { 334 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 335 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 336 identity[i*num_nodes+i] = 1.0; 337 } 338 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 339 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 340 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 341 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 342 // Compute the diagonal of B^T D B 343 // Each element 344 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 345 for (CeedInt e=0; e<num_elem; e++) { 346 CeedInt d_out = -1; 347 // Each basis eval mode pair 348 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 349 const CeedScalar *bt = NULL; 350 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 351 d_out += 1; 352 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 353 &grad_out[d_out*num_qpts*num_nodes], &bt); 354 CeedInt d_in = -1; 355 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 356 const CeedScalar *b = NULL; 357 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 358 d_in += 1; 359 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 360 &grad_in[d_in*num_qpts*num_nodes], &b); 361 // Each component 362 for (CeedInt c_out=0; c_out<num_comp; c_out++) 363 // Each qpoint/node pair 364 for (CeedInt q=0; q<num_qpts; q++) 365 if (is_pointblock) { 366 // Point Block Diagonal 367 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 368 const CeedScalar qf_value = 369 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 370 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 371 if (fabs(qf_value) > qf_value_bound) 372 for (CeedInt n=0; n<num_nodes; n++) 373 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 374 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 375 } 376 } else { 377 // Diagonal Only 378 const CeedScalar qf_value = 379 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 380 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 381 if (fabs(qf_value) > qf_value_bound) 382 for (CeedInt n=0; n<num_nodes; n++) 383 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 384 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 385 } 386 } 387 } 388 } 389 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 390 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 391 CeedChk(ierr); 392 393 // Assemble local operator diagonal 394 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 395 assembled, request); CeedChk(ierr); 396 397 // Cleanup 398 if (is_pointblock) { 399 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 400 } 401 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 402 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 403 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 404 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 405 ierr = CeedFree(&identity); CeedChk(ierr); 406 407 return CEED_ERROR_SUCCESS; 408 } 409 410 /** 411 @brief Core logic for assembling composite operator diagonal 412 413 @param[in] op CeedOperator to assemble point block diagonal 414 @param[in] request Address of CeedRequest for non-blocking completion, else 415 CEED_REQUEST_IMMEDIATE 416 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 417 @param[out] assembled CeedVector to store assembled diagonal 418 419 @return An error code: 0 - success, otherwise - failure 420 421 @ref Developer 422 **/ 423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 424 CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 int ierr; 427 CeedInt num_sub; 428 CeedOperator *suboperators; 429 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 430 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 431 for (CeedInt i = 0; i < num_sub; i++) { 432 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 433 is_pointblock, assembled); CeedChk(ierr); 434 } 435 return CEED_ERROR_SUCCESS; 436 } 437 438 /** 439 @brief Build nonzero pattern for non-composite operator 440 441 Users should generally use CeedOperatorLinearAssembleSymbolic() 442 443 @param[in] op CeedOperator to assemble nonzero pattern 444 @param[in] offset Offset for number of entries 445 @param[out] rows Row number for each entry 446 @param[out] cols Column number for each entry 447 448 @return An error code: 0 - success, otherwise - failure 449 450 @ref Developer 451 **/ 452 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 453 CeedInt *rows, CeedInt *cols) { 454 int ierr; 455 Ceed ceed = op->ceed; 456 if (op->is_composite) 457 // LCOV_EXCL_START 458 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 459 "Composite operator not supported"); 460 // LCOV_EXCL_STOP 461 462 CeedElemRestriction rstr_in; 463 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 464 CeedInt num_elem, elem_size, num_nodes, num_comp; 465 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 466 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 467 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 468 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 469 CeedInt layout_er[3]; 470 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 471 472 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 473 474 // Determine elem_dof relation 475 CeedVector index_vec; 476 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 477 CeedScalar *array; 478 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 479 for (CeedInt i = 0; i < num_nodes; ++i) { 480 array[i] = i; 481 } 482 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 483 CeedVector elem_dof; 484 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 485 CeedChk(ierr); 486 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 487 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 488 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 489 const CeedScalar *elem_dof_a; 490 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 491 CeedChk(ierr); 492 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 493 494 // Determine i, j locations for element matrices 495 CeedInt count = 0; 496 for (int e = 0; e < num_elem; ++e) { 497 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 498 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 499 for (int i = 0; i < elem_size; ++i) { 500 for (int j = 0; j < elem_size; ++j) { 501 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 502 (comp_out)*layout_er[1] + e*layout_er[2]; 503 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 504 (comp_in)*layout_er[1] + e*layout_er[2]; 505 506 const CeedInt row = elem_dof_a[elem_dof_index_row]; 507 const CeedInt col = elem_dof_a[elem_dof_index_col]; 508 509 rows[offset + count] = row; 510 cols[offset + count] = col; 511 count++; 512 } 513 } 514 } 515 } 516 } 517 if (count != local_num_entries) 518 // LCOV_EXCL_START 519 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 520 // LCOV_EXCL_STOP 521 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 522 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 523 524 return CEED_ERROR_SUCCESS; 525 } 526 527 /** 528 @brief Assemble nonzero entries for non-composite operator 529 530 Users should generally use CeedOperatorLinearAssemble() 531 532 @param[in] op CeedOperator to assemble 533 @param[out] offset Offest for number of entries 534 @param[out] values Values to assemble into matrix 535 536 @return An error code: 0 - success, otherwise - failure 537 538 @ref Developer 539 **/ 540 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 541 CeedVector values) { 542 int ierr; 543 Ceed ceed = op->ceed; 544 if (op->is_composite) 545 // LCOV_EXCL_START 546 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 547 "Composite operator not supported"); 548 // LCOV_EXCL_STOP 549 550 // Assemble QFunction 551 CeedQFunction qf; 552 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 553 CeedVector assembled_qf; 554 CeedElemRestriction rstr_q; 555 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 556 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 557 558 CeedInt qf_length; 559 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 560 561 CeedInt num_input_fields, num_output_fields; 562 CeedOperatorField *input_fields; 563 CeedOperatorField *output_fields; 564 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 565 &num_output_fields, &output_fields); CeedChk(ierr); 566 567 // Determine active input basis 568 CeedQFunctionField *qf_fields; 569 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 570 CeedInt num_eval_mode_in = 0, dim = 1; 571 CeedEvalMode *eval_mode_in = NULL; 572 CeedBasis basis_in = NULL; 573 CeedElemRestriction rstr_in = NULL; 574 for (CeedInt i=0; i<num_input_fields; i++) { 575 CeedVector vec; 576 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 577 if (vec == CEED_VECTOR_ACTIVE) { 578 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 579 CeedChk(ierr); 580 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 581 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 582 CeedChk(ierr); 583 CeedEvalMode eval_mode; 584 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 585 CeedChk(ierr); 586 switch (eval_mode) { 587 case CEED_EVAL_NONE: 588 case CEED_EVAL_INTERP: 589 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 590 eval_mode_in[num_eval_mode_in] = eval_mode; 591 num_eval_mode_in += 1; 592 break; 593 case CEED_EVAL_GRAD: 594 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 595 for (CeedInt d=0; d<dim; d++) { 596 eval_mode_in[num_eval_mode_in+d] = eval_mode; 597 } 598 num_eval_mode_in += dim; 599 break; 600 case CEED_EVAL_WEIGHT: 601 case CEED_EVAL_DIV: 602 case CEED_EVAL_CURL: 603 break; // Caught by QF Assembly 604 } 605 } 606 } 607 608 // Determine active output basis 609 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 610 CeedInt num_eval_mode_out = 0; 611 CeedEvalMode *eval_mode_out = NULL; 612 CeedBasis basis_out = NULL; 613 CeedElemRestriction rstr_out = NULL; 614 for (CeedInt i=0; i<num_output_fields; i++) { 615 CeedVector vec; 616 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 617 if (vec == CEED_VECTOR_ACTIVE) { 618 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 619 CeedChk(ierr); 620 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 621 CeedChk(ierr); 622 CeedEvalMode eval_mode; 623 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 624 CeedChk(ierr); 625 switch (eval_mode) { 626 case CEED_EVAL_NONE: 627 case CEED_EVAL_INTERP: 628 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 629 eval_mode_out[num_eval_mode_out] = eval_mode; 630 num_eval_mode_out += 1; 631 break; 632 case CEED_EVAL_GRAD: 633 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 634 for (CeedInt d=0; d<dim; d++) { 635 eval_mode_out[num_eval_mode_out+d] = eval_mode; 636 } 637 num_eval_mode_out += dim; 638 break; 639 case CEED_EVAL_WEIGHT: 640 case CEED_EVAL_DIV: 641 case CEED_EVAL_CURL: 642 break; // Caught by QF Assembly 643 } 644 } 645 } 646 647 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 648 // LCOV_EXCL_START 649 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 650 "Cannot assemble operator with out inputs/outputs"); 651 // LCOV_EXCL_STOP 652 653 CeedInt num_elem, elem_size, num_qpts, num_comp; 654 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 655 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 656 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 657 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 658 659 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 660 661 // loop over elements and put in data structure 662 const CeedScalar *interp_in, *grad_in; 663 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 664 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 665 666 const CeedScalar *assembled_qf_array; 667 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 668 CeedChk(ierr); 669 670 CeedInt layout_qf[3]; 671 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 672 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 673 674 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 675 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 676 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 677 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 678 num_qpts]; // logically 3-tensor 679 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 680 CeedScalar elem_mat[elem_size * elem_size]; 681 int count = 0; 682 CeedScalar *vals; 683 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 684 for (int e = 0; e < num_elem; ++e) { 685 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 686 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 687 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 688 B_mat_in[ell] = 0.0; 689 } 690 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 691 B_mat_out[ell] = 0.0; 692 } 693 // Store block-diagonal D matrix as collection of small dense blocks 694 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 695 D_mat[ell] = 0.0; 696 } 697 // form element matrix itself (for each block component) 698 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 699 elem_mat[ell] = 0.0; 700 } 701 for (int q = 0; q < num_qpts; ++q) { 702 for (int n = 0; n < elem_size; ++n) { 703 CeedInt d_in = -1; 704 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 705 const int qq = num_eval_mode_in*q; 706 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 707 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 708 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 709 d_in += 1; 710 B_mat_in[(qq+e_in)*elem_size + n] += 711 grad_in[(d_in*num_qpts+q) * elem_size + n]; 712 } else { 713 // LCOV_EXCL_START 714 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 715 // LCOV_EXCL_STOP 716 } 717 } 718 CeedInt d_out = -1; 719 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 720 const int qq = num_eval_mode_out*q; 721 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 722 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 723 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 724 d_out += 1; 725 B_mat_out[(qq+e_out)*elem_size + n] += 726 grad_in[(d_out*num_qpts+q) * elem_size + n]; 727 } else { 728 // LCOV_EXCL_START 729 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 730 // LCOV_EXCL_STOP 731 } 732 } 733 } 734 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 735 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 736 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 737 +comp_out; 738 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 739 e*layout_qf[2]; 740 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 741 } 742 } 743 } 744 // Compute B^T*D 745 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 746 BTD[ell] = 0.0; 747 } 748 for (int j = 0; j<elem_size; ++j) { 749 for (int q = 0; q<num_qpts; ++q) { 750 int qq = num_eval_mode_out*q; 751 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 752 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 753 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 754 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 755 } 756 } 757 } 758 } 759 760 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 761 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 762 763 // put element matrix in coordinate data structure 764 for (int i = 0; i < elem_size; ++i) { 765 for (int j = 0; j < elem_size; ++j) { 766 vals[offset + count] = elem_mat[i*elem_size + j]; 767 count++; 768 } 769 } 770 } 771 } 772 } 773 if (count != local_num_entries) 774 // LCOV_EXCL_START 775 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 776 // LCOV_EXCL_STOP 777 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 778 779 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 780 CeedChk(ierr); 781 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 782 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 783 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 784 785 return CEED_ERROR_SUCCESS; 786 } 787 788 /** 789 @brief Count number of entries for assembled CeedOperator 790 791 @param[in] op CeedOperator to assemble 792 @param[out] num_entries Number of entries in assembled representation 793 794 @return An error code: 0 - success, otherwise - failure 795 796 @ref Utility 797 **/ 798 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 799 CeedInt *num_entries) { 800 int ierr; 801 CeedElemRestriction rstr; 802 CeedInt num_elem, elem_size, num_comp; 803 804 if (op->is_composite) 805 // LCOV_EXCL_START 806 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 807 "Composite operator not supported"); 808 // LCOV_EXCL_STOP 809 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 810 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 811 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 812 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 813 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 814 815 return CEED_ERROR_SUCCESS; 816 } 817 818 /** 819 @brief Common code for creating a multigrid coarse operator and level 820 transfer operators for a CeedOperator 821 822 @param[in] op_fine Fine grid operator 823 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 824 @param[in] rstr_coarse Coarse grid restriction 825 @param[in] basis_coarse Coarse grid active vector basis 826 @param[in] basis_c_to_f Basis for coarse to fine interpolation 827 @param[out] op_coarse Coarse grid operator 828 @param[out] op_prolong Coarse to fine operator 829 @param[out] op_restrict Fine to coarse operator 830 831 @return An error code: 0 - success, otherwise - failure 832 833 @ref Developer 834 **/ 835 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 836 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 837 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 838 CeedOperator *op_restrict) { 839 int ierr; 840 Ceed ceed; 841 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 842 843 // Check for composite operator 844 bool is_composite; 845 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 846 if (is_composite) 847 // LCOV_EXCL_START 848 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 849 "Automatic multigrid setup for composite operators not supported"); 850 // LCOV_EXCL_STOP 851 852 // Coarse Grid 853 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 854 op_coarse); CeedChk(ierr); 855 CeedElemRestriction rstr_fine = NULL; 856 // -- Clone input fields 857 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 858 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 859 rstr_fine = op_fine->input_fields[i]->elem_restr; 860 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 861 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 862 CeedChk(ierr); 863 } else { 864 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 865 op_fine->input_fields[i]->elem_restr, 866 op_fine->input_fields[i]->basis, 867 op_fine->input_fields[i]->vec); CeedChk(ierr); 868 } 869 } 870 // -- Clone output fields 871 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 872 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 873 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 874 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 875 CeedChk(ierr); 876 } else { 877 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 878 op_fine->output_fields[i]->elem_restr, 879 op_fine->output_fields[i]->basis, 880 op_fine->output_fields[i]->vec); CeedChk(ierr); 881 } 882 } 883 884 // Multiplicity vector 885 CeedVector mult_vec, mult_e_vec; 886 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 887 CeedChk(ierr); 888 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 889 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 890 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 891 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 892 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 893 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 894 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 895 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 896 897 // Restriction 898 CeedInt num_comp; 899 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 900 CeedQFunction qf_restrict; 901 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 902 CeedChk(ierr); 903 CeedInt *num_comp_r_data; 904 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 905 num_comp_r_data[0] = num_comp; 906 CeedQFunctionContext ctx_r; 907 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 908 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 909 sizeof(*num_comp_r_data), num_comp_r_data); 910 CeedChk(ierr); 911 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 912 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 913 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 914 CeedChk(ierr); 915 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 916 CeedChk(ierr); 917 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 918 CEED_EVAL_INTERP); CeedChk(ierr); 919 920 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 921 CEED_QFUNCTION_NONE, op_restrict); 922 CeedChk(ierr); 923 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 924 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 925 CeedChk(ierr); 926 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 927 CEED_BASIS_COLLOCATED, mult_vec); 928 CeedChk(ierr); 929 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 930 CEED_VECTOR_ACTIVE); CeedChk(ierr); 931 932 // Prolongation 933 CeedQFunction qf_prolong; 934 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 935 CeedChk(ierr); 936 CeedInt *num_comp_p_data; 937 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 938 num_comp_p_data[0] = num_comp; 939 CeedQFunctionContext ctx_p; 940 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 941 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 942 sizeof(*num_comp_p_data), num_comp_p_data); 943 CeedChk(ierr); 944 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 945 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 946 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 947 CeedChk(ierr); 948 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 949 CeedChk(ierr); 950 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 951 CeedChk(ierr); 952 953 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 954 CEED_QFUNCTION_NONE, op_prolong); 955 CeedChk(ierr); 956 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 957 CEED_VECTOR_ACTIVE); CeedChk(ierr); 958 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 959 CEED_BASIS_COLLOCATED, mult_vec); 960 CeedChk(ierr); 961 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 962 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 963 CeedChk(ierr); 964 965 // Cleanup 966 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 967 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 968 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 969 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 970 return CEED_ERROR_SUCCESS; 971 } 972 973 /** 974 @brief Build 1D mass matrix and Laplacian with perturbation 975 976 @param[in] interp_1d Interpolation matrix in one dimension 977 @param[in] grad_1d Gradient matrix in one dimension 978 @param[in] q_weight_1d Quadrature weights in one dimension 979 @param[in] P_1d Number of basis nodes in one dimension 980 @param[in] Q_1d Number of quadrature points in one dimension 981 @param[in] dim Dimension of basis 982 @param[out] mass Assembled mass matrix in one dimension 983 @param[out] laplace Assembled perturbed Laplacian in one dimension 984 985 @return An error code: 0 - success, otherwise - failure 986 987 @ref Developer 988 **/ 989 CeedPragmaOptimizeOff 990 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 991 const CeedScalar *grad_1d, 992 const CeedScalar *q_weight_1d, CeedInt P_1d, 993 CeedInt Q_1d, CeedInt dim, 994 CeedScalar *mass, CeedScalar *laplace) { 995 996 for (CeedInt i=0; i<P_1d; i++) 997 for (CeedInt j=0; j<P_1d; j++) { 998 CeedScalar sum = 0.0; 999 for (CeedInt k=0; k<Q_1d; k++) 1000 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1001 mass[i+j*P_1d] = sum; 1002 } 1003 // -- Laplacian 1004 for (CeedInt i=0; i<P_1d; i++) 1005 for (CeedInt j=0; j<P_1d; j++) { 1006 CeedScalar sum = 0.0; 1007 for (CeedInt k=0; k<Q_1d; k++) 1008 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1009 laplace[i+j*P_1d] = sum; 1010 } 1011 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1012 for (CeedInt i=0; i<P_1d; i++) 1013 laplace[i+P_1d*i] += perturbation; 1014 return CEED_ERROR_SUCCESS; 1015 } 1016 CeedPragmaOptimizeOn 1017 1018 /// @} 1019 1020 /// ---------------------------------------------------------------------------- 1021 /// CeedOperator Public API 1022 /// ---------------------------------------------------------------------------- 1023 /// @addtogroup CeedOperatorUser 1024 /// @{ 1025 1026 /** 1027 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1028 1029 This returns a CeedVector containing a matrix at each quadrature point 1030 providing the action of the CeedQFunction associated with the CeedOperator. 1031 The vector 'assembled' is of shape 1032 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1033 and contains column-major matrices representing the action of the 1034 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1035 outputs are in the order provided by the user when adding CeedOperator fields. 1036 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1037 'v', provided in that order, would result in an assembled QFunction that 1038 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1039 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1040 1041 Note: Calling this function asserts that setup is complete 1042 and sets the CeedOperator as immutable. 1043 1044 @param op CeedOperator to assemble CeedQFunction 1045 @param[out] assembled CeedVector to store assembled CeedQFunction at 1046 quadrature points 1047 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1048 CeedQFunction 1049 @param request Address of CeedRequest for non-blocking completion, else 1050 @ref CEED_REQUEST_IMMEDIATE 1051 1052 @return An error code: 0 - success, otherwise - failure 1053 1054 @ref User 1055 **/ 1056 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1057 CeedElemRestriction *rstr, 1058 CeedRequest *request) { 1059 int ierr; 1060 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1061 1062 // Backend version 1063 if (op->LinearAssembleQFunction) { 1064 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1065 CeedChk(ierr); 1066 } else { 1067 // Fallback to reference Ceed 1068 if (!op->op_fallback) { 1069 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1070 } 1071 // Assemble 1072 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1073 rstr, request); CeedChk(ierr); 1074 } 1075 return CEED_ERROR_SUCCESS; 1076 } 1077 1078 /** 1079 @brief Assemble CeedQFunction and store result internall. Return copied 1080 references of stored data to the caller. Caller is responsible for 1081 ownership and destruction of the copied references. See also 1082 @ref CeedOperatorLinearAssembleQFunction 1083 1084 @param op CeedOperator to assemble CeedQFunction 1085 @param assembled CeedVector to store assembled CeedQFunction at 1086 quadrature points 1087 @param rstr CeedElemRestriction for CeedVector containing assembled 1088 CeedQFunction 1089 @param request Address of CeedRequest for non-blocking completion, else 1090 @ref CEED_REQUEST_IMMEDIATE 1091 1092 @return An error code: 0 - success, otherwise - failure 1093 1094 @ref User 1095 **/ 1096 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1097 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1098 int ierr; 1099 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1100 1101 // Backend version 1102 if (op->LinearAssembleQFunctionUpdate) { 1103 if (op->has_qf_assembled) { 1104 ierr = op->LinearAssembleQFunctionUpdate(op, op->qf_assembled, 1105 op->qf_assembled_rstr, request); 1106 } else { 1107 ierr = op->LinearAssembleQFunction(op, &op->qf_assembled, 1108 &op->qf_assembled_rstr, request); 1109 } 1110 CeedChk(ierr); 1111 op->has_qf_assembled = true; 1112 // Copy reference to internally held copy 1113 *assembled = NULL; 1114 *rstr = NULL; 1115 ierr = CeedVectorReferenceCopy(op->qf_assembled, assembled); CeedChk(ierr); 1116 ierr = CeedElemRestrictionReferenceCopy(op->qf_assembled_rstr, rstr); 1117 } else { 1118 // Fallback to reference Ceed 1119 if (!op->op_fallback) { 1120 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1121 } 1122 // Assemble 1123 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1124 assembled, rstr, request); CeedChk(ierr); 1125 } 1126 CeedChk(ierr); 1127 1128 return CEED_ERROR_SUCCESS; 1129 } 1130 1131 /** 1132 @brief Assemble the diagonal of a square linear CeedOperator 1133 1134 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1135 1136 Note: Currently only non-composite CeedOperators with a single field and 1137 composite CeedOperators with single field sub-operators are supported. 1138 1139 Note: Calling this function asserts that setup is complete 1140 and sets the CeedOperator as immutable. 1141 1142 @param op CeedOperator to assemble CeedQFunction 1143 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1144 @param request Address of CeedRequest for non-blocking completion, else 1145 @ref CEED_REQUEST_IMMEDIATE 1146 1147 @return An error code: 0 - success, otherwise - failure 1148 1149 @ref User 1150 **/ 1151 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1152 CeedRequest *request) { 1153 int ierr; 1154 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1155 1156 // Use backend version, if available 1157 if (op->LinearAssembleDiagonal) { 1158 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1159 return CEED_ERROR_SUCCESS; 1160 } else if (op->LinearAssembleAddDiagonal) { 1161 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1162 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1163 return CEED_ERROR_SUCCESS; 1164 } else { 1165 // Check for valid fallback resource 1166 const char *resource, *fallback_resource; 1167 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1168 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1169 CeedChk(ierr); 1170 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1171 // Fallback to reference Ceed 1172 if (!op->op_fallback) { 1173 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1174 } 1175 // Assemble 1176 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1177 CeedChk(ierr); 1178 return CEED_ERROR_SUCCESS; 1179 } 1180 } 1181 1182 // Default interface implementation 1183 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1184 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1185 CeedChk(ierr); 1186 return CEED_ERROR_SUCCESS; 1187 } 1188 1189 /** 1190 @brief Assemble the diagonal of a square linear CeedOperator 1191 1192 This sums into a CeedVector the diagonal of a linear CeedOperator. 1193 1194 Note: Currently only non-composite CeedOperators with a single field and 1195 composite CeedOperators with single field sub-operators are supported. 1196 1197 Note: Calling this function asserts that setup is complete 1198 and sets the CeedOperator as immutable. 1199 1200 @param op CeedOperator to assemble CeedQFunction 1201 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1202 @param request Address of CeedRequest for non-blocking completion, else 1203 @ref CEED_REQUEST_IMMEDIATE 1204 1205 @return An error code: 0 - success, otherwise - failure 1206 1207 @ref User 1208 **/ 1209 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1210 CeedRequest *request) { 1211 int ierr; 1212 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1213 1214 // Use backend version, if available 1215 if (op->LinearAssembleAddDiagonal) { 1216 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1217 return CEED_ERROR_SUCCESS; 1218 } else { 1219 // Check for valid fallback resource 1220 const char *resource, *fallback_resource; 1221 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1222 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1223 CeedChk(ierr); 1224 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1225 // Fallback to reference Ceed 1226 if (!op->op_fallback) { 1227 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1228 } 1229 // Assemble 1230 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1231 request); CeedChk(ierr); 1232 return CEED_ERROR_SUCCESS; 1233 } 1234 } 1235 1236 // Default interface implementation 1237 bool is_composite; 1238 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1239 if (is_composite) { 1240 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1241 false, assembled); CeedChk(ierr); 1242 return CEED_ERROR_SUCCESS; 1243 } else { 1244 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1245 CeedChk(ierr); 1246 return CEED_ERROR_SUCCESS; 1247 } 1248 } 1249 1250 /** 1251 @brief Assemble the point block diagonal of a square linear CeedOperator 1252 1253 This overwrites a CeedVector with the point block diagonal of a linear 1254 CeedOperator. 1255 1256 Note: Currently only non-composite CeedOperators with a single field and 1257 composite CeedOperators with single field sub-operators are supported. 1258 1259 Note: Calling this function asserts that setup is complete 1260 and sets the CeedOperator as immutable. 1261 1262 @param op CeedOperator to assemble CeedQFunction 1263 @param[out] assembled CeedVector to store assembled CeedOperator point block 1264 diagonal, provided in row-major form with an 1265 @a num_comp * @a num_comp block at each node. The dimensions 1266 of this vector are derived from the active vector 1267 for the CeedOperator. The array has shape 1268 [nodes, component out, component in]. 1269 @param request Address of CeedRequest for non-blocking completion, else 1270 CEED_REQUEST_IMMEDIATE 1271 1272 @return An error code: 0 - success, otherwise - failure 1273 1274 @ref User 1275 **/ 1276 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1277 CeedVector assembled, CeedRequest *request) { 1278 int ierr; 1279 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1280 1281 // Use backend version, if available 1282 if (op->LinearAssemblePointBlockDiagonal) { 1283 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1284 CeedChk(ierr); 1285 return CEED_ERROR_SUCCESS; 1286 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1287 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1288 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1289 request); CeedChk(ierr); 1290 return CEED_ERROR_SUCCESS; 1291 } else { 1292 // Check for valid fallback resource 1293 const char *resource, *fallback_resource; 1294 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1295 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1296 CeedChk(ierr); 1297 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1298 // Fallback to reference Ceed 1299 if (!op->op_fallback) { 1300 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1301 } 1302 // Assemble 1303 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1304 assembled, request); CeedChk(ierr); 1305 return CEED_ERROR_SUCCESS; 1306 } 1307 } 1308 1309 // Default interface implementation 1310 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1311 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1312 CeedChk(ierr); 1313 return CEED_ERROR_SUCCESS; 1314 } 1315 1316 /** 1317 @brief Assemble the point block diagonal of a square linear CeedOperator 1318 1319 This sums into a CeedVector with the point block diagonal of a linear 1320 CeedOperator. 1321 1322 Note: Currently only non-composite CeedOperators with a single field and 1323 composite CeedOperators with single field sub-operators are supported. 1324 1325 Note: Calling this function asserts that setup is complete 1326 and sets the CeedOperator as immutable. 1327 1328 @param op CeedOperator to assemble CeedQFunction 1329 @param[out] assembled CeedVector to store assembled CeedOperator point block 1330 diagonal, provided in row-major form with an 1331 @a num_comp * @a num_comp block at each node. The dimensions 1332 of this vector are derived from the active vector 1333 for the CeedOperator. The array has shape 1334 [nodes, component out, component in]. 1335 @param request Address of CeedRequest for non-blocking completion, else 1336 CEED_REQUEST_IMMEDIATE 1337 1338 @return An error code: 0 - success, otherwise - failure 1339 1340 @ref User 1341 **/ 1342 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1343 CeedVector assembled, CeedRequest *request) { 1344 int ierr; 1345 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1346 1347 // Use backend version, if available 1348 if (op->LinearAssembleAddPointBlockDiagonal) { 1349 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1350 CeedChk(ierr); 1351 return CEED_ERROR_SUCCESS; 1352 } else { 1353 // Check for valid fallback resource 1354 const char *resource, *fallback_resource; 1355 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1356 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1357 CeedChk(ierr); 1358 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1359 // Fallback to reference Ceed 1360 if (!op->op_fallback) { 1361 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1362 } 1363 // Assemble 1364 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1365 assembled, request); CeedChk(ierr); 1366 return CEED_ERROR_SUCCESS; 1367 } 1368 } 1369 1370 // Default interface implemenation 1371 bool is_composite; 1372 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1373 if (is_composite) { 1374 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1375 true, assembled); CeedChk(ierr); 1376 return CEED_ERROR_SUCCESS; 1377 } else { 1378 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1379 CeedChk(ierr); 1380 return CEED_ERROR_SUCCESS; 1381 } 1382 } 1383 1384 /** 1385 @brief Fully assemble the nonzero pattern of a linear operator. 1386 1387 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1388 1389 The assembly routines use coordinate format, with num_entries tuples of the 1390 form (i, j, value) which indicate that value should be added to the matrix 1391 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1392 This function returns the number of entries and their (i, j) locations, 1393 while CeedOperatorLinearAssemble() provides the values in the same 1394 ordering. 1395 1396 This will generally be slow unless your operator is low-order. 1397 1398 Note: Calling this function asserts that setup is complete 1399 and sets the CeedOperator as immutable. 1400 1401 @param[in] op CeedOperator to assemble 1402 @param[out] num_entries Number of entries in coordinate nonzero pattern 1403 @param[out] rows Row number for each entry 1404 @param[out] cols Column number for each entry 1405 1406 @ref User 1407 **/ 1408 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries, 1409 CeedInt **rows, CeedInt **cols) { 1410 int ierr; 1411 CeedInt num_suboperators, single_entries; 1412 CeedOperator *sub_operators; 1413 bool is_composite; 1414 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1415 1416 // Use backend version, if available 1417 if (op->LinearAssembleSymbolic) { 1418 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1419 return CEED_ERROR_SUCCESS; 1420 } else { 1421 // Check for valid fallback resource 1422 const char *resource, *fallback_resource; 1423 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1424 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1425 CeedChk(ierr); 1426 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1427 // Fallback to reference Ceed 1428 if (!op->op_fallback) { 1429 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1430 } 1431 // Assemble 1432 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1433 cols); CeedChk(ierr); 1434 return CEED_ERROR_SUCCESS; 1435 } 1436 } 1437 1438 // Default interface implementation 1439 1440 // count entries and allocate rows, cols arrays 1441 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1442 *num_entries = 0; 1443 if (is_composite) { 1444 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1445 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1446 for (int k = 0; k < num_suboperators; ++k) { 1447 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1448 &single_entries); CeedChk(ierr); 1449 *num_entries += single_entries; 1450 } 1451 } else { 1452 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1453 &single_entries); CeedChk(ierr); 1454 *num_entries += single_entries; 1455 } 1456 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1457 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1458 1459 // assemble nonzero locations 1460 CeedInt offset = 0; 1461 if (is_composite) { 1462 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1463 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1464 for (int k = 0; k < num_suboperators; ++k) { 1465 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1466 *cols); CeedChk(ierr); 1467 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1468 &single_entries); 1469 CeedChk(ierr); 1470 offset += single_entries; 1471 } 1472 } else { 1473 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1474 CeedChk(ierr); 1475 } 1476 1477 return CEED_ERROR_SUCCESS; 1478 } 1479 1480 /** 1481 @brief Fully assemble the nonzero entries of a linear operator. 1482 1483 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1484 1485 The assembly routines use coordinate format, with num_entries tuples of the 1486 form (i, j, value) which indicate that value should be added to the matrix 1487 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1488 This function returns the values of the nonzero entries to be added, their 1489 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1490 1491 This will generally be slow unless your operator is low-order. 1492 1493 Note: Calling this function asserts that setup is complete 1494 and sets the CeedOperator as immutable. 1495 1496 @param[in] op CeedOperator to assemble 1497 @param[out] values Values to assemble into matrix 1498 1499 @ref User 1500 **/ 1501 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1502 int ierr; 1503 CeedInt num_suboperators, single_entries = 0; 1504 CeedOperator *sub_operators; 1505 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1506 1507 // Use backend version, if available 1508 if (op->LinearAssemble) { 1509 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1510 return CEED_ERROR_SUCCESS; 1511 } else { 1512 // Check for valid fallback resource 1513 const char *resource, *fallback_resource; 1514 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1515 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1516 CeedChk(ierr); 1517 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1518 // Fallback to reference Ceed 1519 if (!op->op_fallback) { 1520 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1521 } 1522 // Assemble 1523 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1524 return CEED_ERROR_SUCCESS; 1525 } 1526 } 1527 1528 // Default interface implementation 1529 bool is_composite; 1530 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1531 1532 CeedInt offset = 0; 1533 if (is_composite) { 1534 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1535 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1536 for (int k = 0; k < num_suboperators; ++k) { 1537 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1538 CeedChk(ierr); 1539 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1540 &single_entries); 1541 CeedChk(ierr); 1542 offset += single_entries; 1543 } 1544 } else { 1545 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1546 } 1547 1548 return CEED_ERROR_SUCCESS; 1549 } 1550 1551 /** 1552 @brief Create a multigrid coarse operator and level transfer operators 1553 for a CeedOperator, creating the prolongation basis from the 1554 fine and coarse grid interpolation 1555 1556 Note: Calling this function asserts that setup is complete 1557 and sets the CeedOperator as immutable. 1558 1559 @param[in] op_fine Fine grid operator 1560 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1561 @param[in] rstr_coarse Coarse grid restriction 1562 @param[in] basis_coarse Coarse grid active vector basis 1563 @param[out] op_coarse Coarse grid operator 1564 @param[out] op_prolong Coarse to fine operator 1565 @param[out] op_restrict Fine to coarse operator 1566 1567 @return An error code: 0 - success, otherwise - failure 1568 1569 @ref User 1570 **/ 1571 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1572 CeedVector p_mult_fine, 1573 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1574 CeedOperator *op_coarse, CeedOperator *op_prolong, 1575 CeedOperator *op_restrict) { 1576 int ierr; 1577 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1578 Ceed ceed; 1579 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1580 1581 // Check for compatible quadrature spaces 1582 CeedBasis basis_fine; 1583 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1584 CeedInt Q_f, Q_c; 1585 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1586 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1587 if (Q_f != Q_c) 1588 // LCOV_EXCL_START 1589 return CeedError(ceed, CEED_ERROR_DIMENSION, 1590 "Bases must have compatible quadrature spaces"); 1591 // LCOV_EXCL_STOP 1592 1593 // Coarse to fine basis 1594 CeedInt P_f, P_c, Q = Q_f; 1595 bool is_tensor_f, is_tensor_c; 1596 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1597 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1598 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1599 if (is_tensor_f && is_tensor_c) { 1600 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1601 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1602 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1603 } else if (!is_tensor_f && !is_tensor_c) { 1604 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1605 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1606 } else { 1607 // LCOV_EXCL_START 1608 return CeedError(ceed, CEED_ERROR_MINOR, 1609 "Bases must both be tensor or non-tensor"); 1610 // LCOV_EXCL_STOP 1611 } 1612 1613 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1614 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1615 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1616 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1617 if (is_tensor_f) { 1618 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1619 memcpy(interp_c, basis_coarse->interp_1d, 1620 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1621 } else { 1622 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1623 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1624 } 1625 1626 // -- QR Factorization, interp_f = Q R 1627 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1628 1629 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1630 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1631 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1632 1633 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1634 for (CeedInt j=0; j<P_c; j++) { // Column j 1635 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1636 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1637 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1638 for (CeedInt k=i+1; k<P_f; k++) 1639 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1640 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1641 } 1642 } 1643 ierr = CeedFree(&tau); CeedChk(ierr); 1644 ierr = CeedFree(&interp_c); CeedChk(ierr); 1645 ierr = CeedFree(&interp_f); CeedChk(ierr); 1646 1647 // Complete with interp_c_to_f versions of code 1648 if (is_tensor_f) { 1649 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1650 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1651 CeedChk(ierr); 1652 } else { 1653 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1654 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1655 CeedChk(ierr); 1656 } 1657 1658 // Cleanup 1659 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1660 return CEED_ERROR_SUCCESS; 1661 } 1662 1663 /** 1664 @brief Create a multigrid coarse operator and level transfer operators 1665 for a CeedOperator with a tensor basis for the active basis 1666 1667 Note: Calling this function asserts that setup is complete 1668 and sets the CeedOperator as immutable. 1669 1670 @param[in] op_fine Fine grid operator 1671 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1672 @param[in] rstr_coarse Coarse grid restriction 1673 @param[in] basis_coarse Coarse grid active vector basis 1674 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1675 @param[out] op_coarse Coarse grid operator 1676 @param[out] op_prolong Coarse to fine operator 1677 @param[out] op_restrict Fine to coarse operator 1678 1679 @return An error code: 0 - success, otherwise - failure 1680 1681 @ref User 1682 **/ 1683 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1684 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1685 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1686 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1687 int ierr; 1688 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1689 Ceed ceed; 1690 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1691 1692 // Check for compatible quadrature spaces 1693 CeedBasis basis_fine; 1694 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1695 CeedInt Q_f, Q_c; 1696 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1697 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1698 if (Q_f != Q_c) 1699 // LCOV_EXCL_START 1700 return CeedError(ceed, CEED_ERROR_DIMENSION, 1701 "Bases must have compatible quadrature spaces"); 1702 // LCOV_EXCL_STOP 1703 1704 // Coarse to fine basis 1705 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1706 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1707 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1708 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1709 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1710 CeedChk(ierr); 1711 P_1d_c = dim == 1 ? num_nodes_c : 1712 dim == 2 ? sqrt(num_nodes_c) : 1713 cbrt(num_nodes_c); 1714 CeedScalar *q_ref, *q_weight, *grad; 1715 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1716 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1717 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1718 CeedBasis basis_c_to_f; 1719 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1720 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1721 CeedChk(ierr); 1722 ierr = CeedFree(&q_ref); CeedChk(ierr); 1723 ierr = CeedFree(&q_weight); CeedChk(ierr); 1724 ierr = CeedFree(&grad); CeedChk(ierr); 1725 1726 // Core code 1727 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1728 basis_coarse, basis_c_to_f, op_coarse, 1729 op_prolong, op_restrict); 1730 CeedChk(ierr); 1731 return CEED_ERROR_SUCCESS; 1732 } 1733 1734 /** 1735 @brief Create a multigrid coarse operator and level transfer operators 1736 for a CeedOperator with a non-tensor basis for the active vector 1737 1738 Note: Calling this function asserts that setup is complete 1739 and sets the CeedOperator as immutable. 1740 1741 @param[in] op_fine Fine grid operator 1742 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1743 @param[in] rstr_coarse Coarse grid restriction 1744 @param[in] basis_coarse Coarse grid active vector basis 1745 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1746 @param[out] op_coarse Coarse grid operator 1747 @param[out] op_prolong Coarse to fine operator 1748 @param[out] op_restrict Fine to coarse operator 1749 1750 @return An error code: 0 - success, otherwise - failure 1751 1752 @ref User 1753 **/ 1754 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1755 CeedVector p_mult_fine, 1756 CeedElemRestriction rstr_coarse, 1757 CeedBasis basis_coarse, 1758 const CeedScalar *interp_c_to_f, 1759 CeedOperator *op_coarse, 1760 CeedOperator *op_prolong, 1761 CeedOperator *op_restrict) { 1762 int ierr; 1763 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1764 Ceed ceed; 1765 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1766 1767 // Check for compatible quadrature spaces 1768 CeedBasis basis_fine; 1769 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1770 CeedInt Q_f, Q_c; 1771 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1772 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1773 if (Q_f != Q_c) 1774 // LCOV_EXCL_START 1775 return CeedError(ceed, CEED_ERROR_DIMENSION, 1776 "Bases must have compatible quadrature spaces"); 1777 // LCOV_EXCL_STOP 1778 1779 // Coarse to fine basis 1780 CeedElemTopology topo; 1781 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1782 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1783 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1784 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1785 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1786 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1787 CeedChk(ierr); 1788 CeedScalar *q_ref, *q_weight, *grad; 1789 ierr = CeedCalloc(num_nodes_f, &q_ref); CeedChk(ierr); 1790 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1791 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 1792 CeedBasis basis_c_to_f; 1793 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 1794 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1795 CeedChk(ierr); 1796 ierr = CeedFree(&q_ref); CeedChk(ierr); 1797 ierr = CeedFree(&q_weight); CeedChk(ierr); 1798 ierr = CeedFree(&grad); CeedChk(ierr); 1799 1800 // Core code 1801 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1802 basis_coarse, basis_c_to_f, op_coarse, 1803 op_prolong, op_restrict); 1804 CeedChk(ierr); 1805 return CEED_ERROR_SUCCESS; 1806 } 1807 1808 /** 1809 @brief Build a FDM based approximate inverse for each element for a 1810 CeedOperator 1811 1812 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 1813 Method based approximate inverse. This function obtains the simultaneous 1814 diagonalization for the 1D mass and Laplacian operators, 1815 M = V^T V, K = V^T S V. 1816 The assembled QFunction is used to modify the eigenvalues from simultaneous 1817 diagonalization and obtain an approximate inverse of the form 1818 V^T S^hat V. The CeedOperator must be linear and non-composite. The 1819 associated CeedQFunction must therefore also be linear. 1820 1821 Note: Calling this function asserts that setup is complete 1822 and sets the CeedOperator as immutable. 1823 1824 @param op CeedOperator to create element inverses 1825 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 1826 for each element 1827 @param request Address of CeedRequest for non-blocking completion, else 1828 @ref CEED_REQUEST_IMMEDIATE 1829 1830 @return An error code: 0 - success, otherwise - failure 1831 1832 @ref Backend 1833 **/ 1834 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 1835 CeedRequest *request) { 1836 int ierr; 1837 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1838 1839 // Use backend version, if available 1840 if (op->CreateFDMElementInverse) { 1841 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 1842 return CEED_ERROR_SUCCESS; 1843 } else { 1844 // Check for valid fallback resource 1845 const char *resource, *fallback_resource; 1846 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1847 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1848 CeedChk(ierr); 1849 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1850 // Fallback to reference Ceed 1851 if (!op->op_fallback) { 1852 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1853 } 1854 // Assemble 1855 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 1856 CeedChk(ierr); 1857 return CEED_ERROR_SUCCESS; 1858 } 1859 } 1860 1861 // Interface implementation 1862 Ceed ceed, ceed_parent; 1863 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1864 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 1865 ceed_parent = ceed_parent ? ceed_parent : ceed; 1866 CeedQFunction qf; 1867 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 1868 1869 // Determine active input basis 1870 bool interp = false, grad = false; 1871 CeedBasis basis = NULL; 1872 CeedElemRestriction rstr = NULL; 1873 CeedOperatorField *op_fields; 1874 CeedQFunctionField *qf_fields; 1875 CeedInt num_input_fields; 1876 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 1877 CeedChk(ierr); 1878 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 1879 for (CeedInt i=0; i<num_input_fields; i++) { 1880 CeedVector vec; 1881 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 1882 if (vec == CEED_VECTOR_ACTIVE) { 1883 CeedEvalMode eval_mode; 1884 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 1885 interp = interp || eval_mode == CEED_EVAL_INTERP; 1886 grad = grad || eval_mode == CEED_EVAL_GRAD; 1887 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 1888 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 1889 } 1890 } 1891 if (!basis) 1892 // LCOV_EXCL_START 1893 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 1894 // LCOV_EXCL_STOP 1895 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1, 1896 l_size = 1; 1897 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 1898 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 1899 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 1900 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 1901 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 1902 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 1903 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 1904 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 1905 1906 // Build and diagonalize 1D Mass and Laplacian 1907 bool tensor_basis; 1908 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 1909 if (!tensor_basis) 1910 // LCOV_EXCL_START 1911 return CeedError(ceed, CEED_ERROR_BACKEND, 1912 "FDMElementInverse only supported for tensor " 1913 "bases"); 1914 // LCOV_EXCL_STOP 1915 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 1916 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 1917 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 1918 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 1919 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 1920 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 1921 // -- Build matrices 1922 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 1923 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 1924 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 1925 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 1926 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 1927 mass, laplace); CeedChk(ierr); 1928 1929 // -- Diagonalize 1930 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 1931 CeedChk(ierr); 1932 ierr = CeedFree(&mass); CeedChk(ierr); 1933 ierr = CeedFree(&laplace); CeedChk(ierr); 1934 for (CeedInt i=0; i<P_1d; i++) 1935 for (CeedInt j=0; j<P_1d; j++) 1936 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 1937 ierr = CeedFree(&x); CeedChk(ierr); 1938 1939 // Assemble QFunction 1940 CeedVector assembled; 1941 CeedElemRestriction rstr_qf; 1942 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 1943 &rstr_qf, request); CeedChk(ierr); 1944 CeedInt layout[3]; 1945 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 1946 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 1947 CeedScalar max_norm = 0; 1948 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 1949 1950 // Calculate element averages 1951 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 1952 CeedScalar *elem_avg; 1953 const CeedScalar *assembled_array, *q_weight_array; 1954 CeedVector q_weight; 1955 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 1956 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 1957 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 1958 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 1959 CeedChk(ierr); 1960 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 1961 CeedChk(ierr); 1962 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 1963 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 1964 for (CeedInt e=0; e<num_elem; e++) { 1965 CeedInt count = 0; 1966 for (CeedInt q=0; q<num_qpts; q++) 1967 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 1968 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 1969 qf_value_bound) { 1970 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 1971 q_weight_array[q]; 1972 count++; 1973 } 1974 if (count) { 1975 elem_avg[e] /= count; 1976 } else { 1977 elem_avg[e] = 1.0; 1978 } 1979 } 1980 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 1981 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 1982 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 1983 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 1984 1985 // Build FDM diagonal 1986 CeedVector q_data; 1987 CeedScalar *q_data_array, *fdm_diagonal; 1988 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 1989 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 1990 for (CeedInt c=0; c<num_comp; c++) 1991 for (CeedInt n=0; n<elem_size; n++) { 1992 if (interp) 1993 fdm_diagonal[c*elem_size + n] = 1.0; 1994 if (grad) 1995 for (CeedInt d=0; d<dim; d++) { 1996 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 1997 fdm_diagonal[c*elem_size + n] += lambda[i]; 1998 } 1999 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2000 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2001 } 2002 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2003 CeedChk(ierr); 2004 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2005 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2006 CeedChk(ierr); 2007 for (CeedInt e=0; e<num_elem; e++) 2008 for (CeedInt c=0; c<num_comp; c++) 2009 for (CeedInt n=0; n<elem_size; n++) 2010 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2011 fdm_diagonal[c*elem_size + n]); 2012 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2013 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2014 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2015 2016 // Setup FDM operator 2017 // -- Basis 2018 CeedBasis fdm_basis; 2019 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2020 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2021 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2022 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2023 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2024 fdm_interp, grad_dummy, q_ref_dummy, 2025 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2026 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2027 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2028 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2029 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2030 ierr = CeedFree(&lambda); CeedChk(ierr); 2031 2032 // -- Restriction 2033 CeedElemRestriction rstr_qd_i; 2034 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2035 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2036 num_comp, num_elem*num_comp*elem_size, 2037 strides, &rstr_qd_i); CeedChk(ierr); 2038 // -- QFunction 2039 CeedQFunction qf_fdm; 2040 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2041 CeedChk(ierr); 2042 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2043 CeedChk(ierr); 2044 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2045 CeedChk(ierr); 2046 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2047 CeedChk(ierr); 2048 // -- QFunction context 2049 CeedInt *num_comp_data; 2050 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2051 num_comp_data[0] = num_comp; 2052 CeedQFunctionContext ctx_fdm; 2053 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2054 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2055 sizeof(*num_comp_data), num_comp_data); 2056 CeedChk(ierr); 2057 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2058 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2059 // -- Operator 2060 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2061 CeedChk(ierr); 2062 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2063 CeedChk(ierr); 2064 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2065 q_data); CeedChk(ierr); 2066 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2067 CeedChk(ierr); 2068 2069 // Cleanup 2070 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2071 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2072 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2073 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2074 2075 return CEED_ERROR_SUCCESS; 2076 } 2077 2078 /// @} 2079