1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->is_interface_setup = false; 75 op_ref->is_backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 op->op_fallback = op_ref; 79 80 // Clone QF 81 CeedQFunction qf_ref; 82 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 83 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 84 qf_ref->data = NULL; 85 qf_ref->ceed = ceed_ref; 86 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 87 op_ref->qf = qf_ref; 88 op->qf_fallback = qf_ref; 89 return CEED_ERROR_SUCCESS; 90 } 91 92 /** 93 @brief Select correct basis matrix pointer based on CeedEvalMode 94 95 @param[in] eval_mode Current basis evaluation mode 96 @param[in] identity Pointer to identity matrix 97 @param[in] interp Pointer to interpolation matrix 98 @param[in] grad Pointer to gradient matrix 99 @param[out] basis_ptr Basis pointer to set 100 101 @ref Developer 102 **/ 103 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 104 const CeedScalar *identity, const CeedScalar *interp, 105 const CeedScalar *grad, const CeedScalar **basis_ptr) { 106 switch (eval_mode) { 107 case CEED_EVAL_NONE: 108 *basis_ptr = identity; 109 break; 110 case CEED_EVAL_INTERP: 111 *basis_ptr = interp; 112 break; 113 case CEED_EVAL_GRAD: 114 *basis_ptr = grad; 115 break; 116 case CEED_EVAL_WEIGHT: 117 case CEED_EVAL_DIV: 118 case CEED_EVAL_CURL: 119 break; // Caught by QF Assembly 120 } 121 } 122 123 /** 124 @brief Create point block restriction for active operator field 125 126 @param[in] rstr Original CeedElemRestriction for active field 127 @param[out] pointblock_rstr Address of the variable where the newly created 128 CeedElemRestriction will be stored 129 130 @return An error code: 0 - success, otherwise - failure 131 132 @ref Developer 133 **/ 134 static int CeedOperatorCreateActivePointBlockRestriction( 135 CeedElemRestriction rstr, 136 CeedElemRestriction *pointblock_rstr) { 137 int ierr; 138 Ceed ceed; 139 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 140 const CeedInt *offsets; 141 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 142 CeedChk(ierr); 143 144 // Expand offsets 145 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 146 *pointblock_offsets; 147 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 148 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 149 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 151 CeedInt shift = num_comp; 152 if (comp_stride != 1) 153 shift *= num_comp; 154 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 155 CeedChk(ierr); 156 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 157 pointblock_offsets[i] = offsets[i]*shift; 158 if (pointblock_offsets[i] > max) 159 max = pointblock_offsets[i]; 160 } 161 162 // Create new restriction 163 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 164 1, max + num_comp*num_comp, CEED_MEM_HOST, 165 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 166 CeedChk(ierr); 167 168 // Cleanup 169 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 170 171 return CEED_ERROR_SUCCESS; 172 } 173 174 /** 175 @brief Core logic for assembling operator diagonal or point block diagonal 176 177 @param[in] op CeedOperator to assemble point block diagonal 178 @param[in] request Address of CeedRequest for non-blocking completion, else 179 CEED_REQUEST_IMMEDIATE 180 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 181 @param[out] assembled CeedVector to store assembled diagonal 182 183 @return An error code: 0 - success, otherwise - failure 184 185 @ref Developer 186 **/ 187 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 188 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 189 int ierr; 190 Ceed ceed; 191 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 192 193 // Assemble QFunction 194 CeedQFunction qf; 195 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 196 CeedInt num_input_fields, num_output_fields; 197 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 198 CeedChk(ierr); 199 CeedVector assembled_qf; 200 CeedElemRestriction rstr; 201 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 202 &rstr, request); CeedChk(ierr); 203 CeedInt layout[3]; 204 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 205 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 206 CeedScalar max_norm = 0; 207 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 208 209 // Determine active input basis 210 CeedOperatorField *op_fields; 211 CeedQFunctionField *qf_fields; 212 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 213 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 214 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 215 CeedEvalMode *eval_mode_in = NULL; 216 CeedBasis basis_in = NULL; 217 CeedElemRestriction rstr_in = NULL; 218 for (CeedInt i=0; i<num_input_fields; i++) { 219 CeedVector vec; 220 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 221 if (vec == CEED_VECTOR_ACTIVE) { 222 CeedElemRestriction rstr; 223 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 224 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 225 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 226 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 227 if (rstr_in && rstr_in != rstr) 228 // LCOV_EXCL_START 229 return CeedError(ceed, CEED_ERROR_BACKEND, 230 "Multi-field non-composite operator diagonal assembly not supported"); 231 // LCOV_EXCL_STOP 232 rstr_in = rstr; 233 CeedEvalMode eval_mode; 234 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 235 switch (eval_mode) { 236 case CEED_EVAL_NONE: 237 case CEED_EVAL_INTERP: 238 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 239 eval_mode_in[num_eval_mode_in] = eval_mode; 240 num_eval_mode_in += 1; 241 break; 242 case CEED_EVAL_GRAD: 243 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 244 for (CeedInt d=0; d<dim; d++) 245 eval_mode_in[num_eval_mode_in+d] = eval_mode; 246 num_eval_mode_in += dim; 247 break; 248 case CEED_EVAL_WEIGHT: 249 case CEED_EVAL_DIV: 250 case CEED_EVAL_CURL: 251 break; // Caught by QF Assembly 252 } 253 } 254 } 255 256 // Determine active output basis 257 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 258 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 259 CeedInt num_eval_mode_out = 0; 260 CeedEvalMode *eval_mode_out = NULL; 261 CeedBasis basis_out = NULL; 262 CeedElemRestriction rstr_out = NULL; 263 for (CeedInt i=0; i<num_output_fields; i++) { 264 CeedVector vec; 265 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 266 if (vec == CEED_VECTOR_ACTIVE) { 267 CeedElemRestriction rstr; 268 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 269 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 270 if (rstr_out && rstr_out != rstr) 271 // LCOV_EXCL_START 272 return CeedError(ceed, CEED_ERROR_BACKEND, 273 "Multi-field non-composite operator diagonal assembly not supported"); 274 // LCOV_EXCL_STOP 275 rstr_out = rstr; 276 CeedEvalMode eval_mode; 277 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 278 switch (eval_mode) { 279 case CEED_EVAL_NONE: 280 case CEED_EVAL_INTERP: 281 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 282 eval_mode_out[num_eval_mode_out] = eval_mode; 283 num_eval_mode_out += 1; 284 break; 285 case CEED_EVAL_GRAD: 286 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 287 for (CeedInt d=0; d<dim; d++) 288 eval_mode_out[num_eval_mode_out+d] = eval_mode; 289 num_eval_mode_out += dim; 290 break; 291 case CEED_EVAL_WEIGHT: 292 case CEED_EVAL_DIV: 293 case CEED_EVAL_CURL: 294 break; // Caught by QF Assembly 295 } 296 } 297 } 298 299 // Assemble point block diagonal restriction, if needed 300 CeedElemRestriction diag_rstr = rstr_out; 301 if (is_pointblock) { 302 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 303 CeedChk(ierr); 304 } 305 306 // Create diagonal vector 307 CeedVector elem_diag; 308 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 309 CeedChk(ierr); 310 311 // Assemble element operator diagonals 312 CeedScalar *elem_diag_array; 313 const CeedScalar *assembled_qf_array; 314 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 315 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 316 CeedChk(ierr); 317 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 318 CeedChk(ierr); 319 CeedInt num_elem, num_nodes, num_qpts; 320 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 321 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 322 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 323 // Basis matrices 324 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 325 CeedScalar *identity = NULL; 326 bool evalNone = false; 327 for (CeedInt i=0; i<num_eval_mode_in; i++) 328 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 329 for (CeedInt i=0; i<num_eval_mode_out; i++) 330 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 331 if (evalNone) { 332 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 333 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 334 identity[i*num_nodes+i] = 1.0; 335 } 336 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 337 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 338 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 339 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 340 // Compute the diagonal of B^T D B 341 // Each element 342 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 343 for (CeedInt e=0; e<num_elem; e++) { 344 CeedInt d_out = -1; 345 // Each basis eval mode pair 346 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 347 const CeedScalar *bt = NULL; 348 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 349 d_out += 1; 350 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 351 &grad_out[d_out*num_qpts*num_nodes], &bt); 352 CeedInt d_in = -1; 353 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 354 const CeedScalar *b = NULL; 355 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 356 d_in += 1; 357 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 358 &grad_in[d_in*num_qpts*num_nodes], &b); 359 // Each component 360 for (CeedInt c_out=0; c_out<num_comp; c_out++) 361 // Each qpoint/node pair 362 for (CeedInt q=0; q<num_qpts; q++) 363 if (is_pointblock) { 364 // Point Block Diagonal 365 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 366 const CeedScalar qf_value = 367 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 368 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 369 if (fabs(qf_value) > qf_value_bound) 370 for (CeedInt n=0; n<num_nodes; n++) 371 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 372 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 373 } 374 } else { 375 // Diagonal Only 376 const CeedScalar qf_value = 377 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 378 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 379 if (fabs(qf_value) > qf_value_bound) 380 for (CeedInt n=0; n<num_nodes; n++) 381 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 382 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 383 } 384 } 385 } 386 } 387 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 388 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 389 CeedChk(ierr); 390 391 // Assemble local operator diagonal 392 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 393 assembled, request); CeedChk(ierr); 394 395 // Cleanup 396 if (is_pointblock) { 397 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 398 } 399 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 400 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 401 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 402 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 403 ierr = CeedFree(&identity); CeedChk(ierr); 404 405 return CEED_ERROR_SUCCESS; 406 } 407 408 /** 409 @brief Core logic for assembling composite operator diagonal 410 411 @param[in] op CeedOperator to assemble point block diagonal 412 @param[in] request Address of CeedRequest for non-blocking completion, else 413 CEED_REQUEST_IMMEDIATE 414 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 415 @param[out] assembled CeedVector to store assembled diagonal 416 417 @return An error code: 0 - success, otherwise - failure 418 419 @ref Developer 420 **/ 421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 422 CeedOperator op, CeedRequest *request, const bool is_pointblock, 423 CeedVector assembled) { 424 int ierr; 425 CeedInt num_sub; 426 CeedOperator *suboperators; 427 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 428 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 429 for (CeedInt i = 0; i < num_sub; i++) { 430 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 431 is_pointblock, assembled); CeedChk(ierr); 432 } 433 return CEED_ERROR_SUCCESS; 434 } 435 436 /** 437 @brief Build nonzero pattern for non-composite operator 438 439 Users should generally use CeedOperatorLinearAssembleSymbolic() 440 441 @param[in] op CeedOperator to assemble nonzero pattern 442 @param[in] offset Offset for number of entries 443 @param[out] rows Row number for each entry 444 @param[out] cols Column number for each entry 445 446 @return An error code: 0 - success, otherwise - failure 447 448 @ref Developer 449 **/ 450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 451 CeedInt *rows, CeedInt *cols) { 452 int ierr; 453 Ceed ceed = op->ceed; 454 if (op->is_composite) 455 // LCOV_EXCL_START 456 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 457 "Composite operator not supported"); 458 // LCOV_EXCL_STOP 459 460 CeedElemRestriction rstr_in; 461 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 462 CeedSize num_nodes; 463 CeedInt num_elem, elem_size, num_comp; 464 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 465 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 466 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 467 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 468 CeedInt layout_er[3]; 469 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 470 471 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 472 473 // Determine elem_dof relation 474 CeedVector index_vec; 475 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 476 CeedScalar *array; 477 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 478 for (CeedInt i = 0; i < num_nodes; ++i) { 479 array[i] = i; 480 } 481 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 482 CeedVector elem_dof; 483 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 484 CeedChk(ierr); 485 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 486 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 487 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 488 const CeedScalar *elem_dof_a; 489 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 490 CeedChk(ierr); 491 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 492 493 // Determine i, j locations for element matrices 494 CeedInt count = 0; 495 for (int e = 0; e < num_elem; ++e) { 496 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 497 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 498 for (int i = 0; i < elem_size; ++i) { 499 for (int j = 0; j < elem_size; ++j) { 500 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 501 (comp_out)*layout_er[1] + e*layout_er[2]; 502 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 503 (comp_in)*layout_er[1] + e*layout_er[2]; 504 505 const CeedInt row = elem_dof_a[elem_dof_index_row]; 506 const CeedInt col = elem_dof_a[elem_dof_index_col]; 507 508 rows[offset + count] = row; 509 cols[offset + count] = col; 510 count++; 511 } 512 } 513 } 514 } 515 } 516 if (count != local_num_entries) 517 // LCOV_EXCL_START 518 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 519 // LCOV_EXCL_STOP 520 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 521 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 522 523 return CEED_ERROR_SUCCESS; 524 } 525 526 /** 527 @brief Assemble nonzero entries for non-composite operator 528 529 Users should generally use CeedOperatorLinearAssemble() 530 531 @param[in] op CeedOperator to assemble 532 @param[out] offset Offest for number of entries 533 @param[out] values Values to assemble into matrix 534 535 @return An error code: 0 - success, otherwise - failure 536 537 @ref Developer 538 **/ 539 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 540 CeedVector values) { 541 int ierr; 542 Ceed ceed = op->ceed; 543 if (op->is_composite) 544 // LCOV_EXCL_START 545 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 546 "Composite operator not supported"); 547 // LCOV_EXCL_STOP 548 549 // Assemble QFunction 550 CeedQFunction qf; 551 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 552 CeedVector assembled_qf; 553 CeedElemRestriction rstr_q; 554 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 555 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 556 557 CeedSize qf_length; 558 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 559 560 CeedInt num_input_fields, num_output_fields; 561 CeedOperatorField *input_fields; 562 CeedOperatorField *output_fields; 563 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 564 &num_output_fields, &output_fields); CeedChk(ierr); 565 566 // Determine active input basis 567 CeedQFunctionField *qf_fields; 568 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 569 CeedInt num_eval_mode_in = 0, dim = 1; 570 CeedEvalMode *eval_mode_in = NULL; 571 CeedBasis basis_in = NULL; 572 CeedElemRestriction rstr_in = NULL; 573 for (CeedInt i=0; i<num_input_fields; i++) { 574 CeedVector vec; 575 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 576 if (vec == CEED_VECTOR_ACTIVE) { 577 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 578 CeedChk(ierr); 579 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 580 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 581 CeedChk(ierr); 582 CeedEvalMode eval_mode; 583 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 584 CeedChk(ierr); 585 switch (eval_mode) { 586 case CEED_EVAL_NONE: 587 case CEED_EVAL_INTERP: 588 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 589 eval_mode_in[num_eval_mode_in] = eval_mode; 590 num_eval_mode_in += 1; 591 break; 592 case CEED_EVAL_GRAD: 593 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 594 for (CeedInt d=0; d<dim; d++) { 595 eval_mode_in[num_eval_mode_in+d] = eval_mode; 596 } 597 num_eval_mode_in += dim; 598 break; 599 case CEED_EVAL_WEIGHT: 600 case CEED_EVAL_DIV: 601 case CEED_EVAL_CURL: 602 break; // Caught by QF Assembly 603 } 604 } 605 } 606 607 // Determine active output basis 608 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 609 CeedInt num_eval_mode_out = 0; 610 CeedEvalMode *eval_mode_out = NULL; 611 CeedBasis basis_out = NULL; 612 CeedElemRestriction rstr_out = NULL; 613 for (CeedInt i=0; i<num_output_fields; i++) { 614 CeedVector vec; 615 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 616 if (vec == CEED_VECTOR_ACTIVE) { 617 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 618 CeedChk(ierr); 619 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 620 CeedChk(ierr); 621 CeedEvalMode eval_mode; 622 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 623 CeedChk(ierr); 624 switch (eval_mode) { 625 case CEED_EVAL_NONE: 626 case CEED_EVAL_INTERP: 627 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 628 eval_mode_out[num_eval_mode_out] = eval_mode; 629 num_eval_mode_out += 1; 630 break; 631 case CEED_EVAL_GRAD: 632 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 633 for (CeedInt d=0; d<dim; d++) { 634 eval_mode_out[num_eval_mode_out+d] = eval_mode; 635 } 636 num_eval_mode_out += dim; 637 break; 638 case CEED_EVAL_WEIGHT: 639 case CEED_EVAL_DIV: 640 case CEED_EVAL_CURL: 641 break; // Caught by QF Assembly 642 } 643 } 644 } 645 646 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 647 // LCOV_EXCL_START 648 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 649 "Cannot assemble operator with out inputs/outputs"); 650 // LCOV_EXCL_STOP 651 652 CeedInt num_elem, elem_size, num_qpts, num_comp; 653 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 654 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 655 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 656 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 657 658 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 659 660 // loop over elements and put in data structure 661 const CeedScalar *interp_in, *grad_in; 662 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 663 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 664 665 const CeedScalar *assembled_qf_array; 666 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 667 CeedChk(ierr); 668 669 CeedInt layout_qf[3]; 670 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 671 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 672 673 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 674 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 675 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 676 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 677 num_qpts]; // logically 3-tensor 678 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 679 CeedScalar elem_mat[elem_size * elem_size]; 680 int count = 0; 681 CeedScalar *vals; 682 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 683 for (int e = 0; e < num_elem; ++e) { 684 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 685 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 686 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 687 B_mat_in[ell] = 0.0; 688 } 689 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 690 B_mat_out[ell] = 0.0; 691 } 692 // Store block-diagonal D matrix as collection of small dense blocks 693 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 694 D_mat[ell] = 0.0; 695 } 696 // form element matrix itself (for each block component) 697 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 698 elem_mat[ell] = 0.0; 699 } 700 for (int q = 0; q < num_qpts; ++q) { 701 for (int n = 0; n < elem_size; ++n) { 702 CeedInt d_in = -1; 703 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 704 const int qq = num_eval_mode_in*q; 705 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 706 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 707 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 708 d_in += 1; 709 B_mat_in[(qq+e_in)*elem_size + n] += 710 grad_in[(d_in*num_qpts+q) * elem_size + n]; 711 } else { 712 // LCOV_EXCL_START 713 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 714 // LCOV_EXCL_STOP 715 } 716 } 717 CeedInt d_out = -1; 718 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 719 const int qq = num_eval_mode_out*q; 720 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 721 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 722 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 723 d_out += 1; 724 B_mat_out[(qq+e_out)*elem_size + n] += 725 grad_in[(d_out*num_qpts+q) * elem_size + n]; 726 } else { 727 // LCOV_EXCL_START 728 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 729 // LCOV_EXCL_STOP 730 } 731 } 732 } 733 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 734 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 735 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 736 +comp_out; 737 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 738 e*layout_qf[2]; 739 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 740 } 741 } 742 } 743 // Compute B^T*D 744 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 745 BTD[ell] = 0.0; 746 } 747 for (int j = 0; j<elem_size; ++j) { 748 for (int q = 0; q<num_qpts; ++q) { 749 int qq = num_eval_mode_out*q; 750 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 751 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 752 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 753 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 754 } 755 } 756 } 757 } 758 759 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 760 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 761 762 // put element matrix in coordinate data structure 763 for (int i = 0; i < elem_size; ++i) { 764 for (int j = 0; j < elem_size; ++j) { 765 vals[offset + count] = elem_mat[i*elem_size + j]; 766 count++; 767 } 768 } 769 } 770 } 771 } 772 if (count != local_num_entries) 773 // LCOV_EXCL_START 774 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 775 // LCOV_EXCL_STOP 776 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 777 778 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 779 CeedChk(ierr); 780 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 781 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 782 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 783 784 return CEED_ERROR_SUCCESS; 785 } 786 787 /** 788 @brief Count number of entries for assembled CeedOperator 789 790 @param[in] op CeedOperator to assemble 791 @param[out] num_entries Number of entries in assembled representation 792 793 @return An error code: 0 - success, otherwise - failure 794 795 @ref Utility 796 **/ 797 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 798 CeedInt *num_entries) { 799 int ierr; 800 CeedElemRestriction rstr; 801 CeedInt num_elem, elem_size, num_comp; 802 803 if (op->is_composite) 804 // LCOV_EXCL_START 805 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 806 "Composite operator not supported"); 807 // LCOV_EXCL_STOP 808 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 809 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 810 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 811 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 812 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 813 814 return CEED_ERROR_SUCCESS; 815 } 816 817 /** 818 @brief Common code for creating a multigrid coarse operator and level 819 transfer operators for a CeedOperator 820 821 @param[in] op_fine Fine grid operator 822 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 823 @param[in] rstr_coarse Coarse grid restriction 824 @param[in] basis_coarse Coarse grid active vector basis 825 @param[in] basis_c_to_f Basis for coarse to fine interpolation 826 @param[out] op_coarse Coarse grid operator 827 @param[out] op_prolong Coarse to fine operator 828 @param[out] op_restrict Fine to coarse operator 829 830 @return An error code: 0 - success, otherwise - failure 831 832 @ref Developer 833 **/ 834 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 835 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 836 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 837 CeedOperator *op_restrict) { 838 int ierr; 839 Ceed ceed; 840 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 841 842 // Check for composite operator 843 bool is_composite; 844 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 845 if (is_composite) 846 // LCOV_EXCL_START 847 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 848 "Automatic multigrid setup for composite operators not supported"); 849 // LCOV_EXCL_STOP 850 851 // Coarse Grid 852 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 853 op_coarse); CeedChk(ierr); 854 CeedElemRestriction rstr_fine = NULL; 855 // -- Clone input fields 856 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 857 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 858 rstr_fine = op_fine->input_fields[i]->elem_restr; 859 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 860 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 861 CeedChk(ierr); 862 } else { 863 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 864 op_fine->input_fields[i]->elem_restr, 865 op_fine->input_fields[i]->basis, 866 op_fine->input_fields[i]->vec); CeedChk(ierr); 867 } 868 } 869 // -- Clone output fields 870 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 871 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 872 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 873 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 874 CeedChk(ierr); 875 } else { 876 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 877 op_fine->output_fields[i]->elem_restr, 878 op_fine->output_fields[i]->basis, 879 op_fine->output_fields[i]->vec); CeedChk(ierr); 880 } 881 } 882 883 // Multiplicity vector 884 CeedVector mult_vec, mult_e_vec; 885 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 886 CeedChk(ierr); 887 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 888 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 889 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 890 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 891 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 892 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 893 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 894 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 895 896 // Restriction 897 CeedInt num_comp; 898 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 899 CeedQFunction qf_restrict; 900 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 901 CeedChk(ierr); 902 CeedInt *num_comp_r_data; 903 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 904 num_comp_r_data[0] = num_comp; 905 CeedQFunctionContext ctx_r; 906 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 907 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 908 sizeof(*num_comp_r_data), num_comp_r_data); 909 CeedChk(ierr); 910 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 911 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 912 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 913 CeedChk(ierr); 914 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 915 CeedChk(ierr); 916 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 917 CEED_EVAL_INTERP); CeedChk(ierr); 918 919 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 920 CEED_QFUNCTION_NONE, op_restrict); 921 CeedChk(ierr); 922 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 923 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 924 CeedChk(ierr); 925 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 926 CEED_BASIS_COLLOCATED, mult_vec); 927 CeedChk(ierr); 928 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 929 CEED_VECTOR_ACTIVE); CeedChk(ierr); 930 931 // Prolongation 932 CeedQFunction qf_prolong; 933 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 934 CeedChk(ierr); 935 CeedInt *num_comp_p_data; 936 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 937 num_comp_p_data[0] = num_comp; 938 CeedQFunctionContext ctx_p; 939 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 940 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 941 sizeof(*num_comp_p_data), num_comp_p_data); 942 CeedChk(ierr); 943 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 944 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 945 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 946 CeedChk(ierr); 947 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 948 CeedChk(ierr); 949 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 950 CeedChk(ierr); 951 952 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 953 CEED_QFUNCTION_NONE, op_prolong); 954 CeedChk(ierr); 955 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 956 CEED_VECTOR_ACTIVE); CeedChk(ierr); 957 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 958 CEED_BASIS_COLLOCATED, mult_vec); 959 CeedChk(ierr); 960 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 961 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 962 CeedChk(ierr); 963 964 // Cleanup 965 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 966 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 967 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 968 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 969 return CEED_ERROR_SUCCESS; 970 } 971 972 /** 973 @brief Build 1D mass matrix and Laplacian with perturbation 974 975 @param[in] interp_1d Interpolation matrix in one dimension 976 @param[in] grad_1d Gradient matrix in one dimension 977 @param[in] q_weight_1d Quadrature weights in one dimension 978 @param[in] P_1d Number of basis nodes in one dimension 979 @param[in] Q_1d Number of quadrature points in one dimension 980 @param[in] dim Dimension of basis 981 @param[out] mass Assembled mass matrix in one dimension 982 @param[out] laplace Assembled perturbed Laplacian in one dimension 983 984 @return An error code: 0 - success, otherwise - failure 985 986 @ref Developer 987 **/ 988 CeedPragmaOptimizeOff 989 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 990 const CeedScalar *grad_1d, 991 const CeedScalar *q_weight_1d, CeedInt P_1d, 992 CeedInt Q_1d, CeedInt dim, 993 CeedScalar *mass, CeedScalar *laplace) { 994 995 for (CeedInt i=0; i<P_1d; i++) 996 for (CeedInt j=0; j<P_1d; j++) { 997 CeedScalar sum = 0.0; 998 for (CeedInt k=0; k<Q_1d; k++) 999 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1000 mass[i+j*P_1d] = sum; 1001 } 1002 // -- Laplacian 1003 for (CeedInt i=0; i<P_1d; i++) 1004 for (CeedInt j=0; j<P_1d; j++) { 1005 CeedScalar sum = 0.0; 1006 for (CeedInt k=0; k<Q_1d; k++) 1007 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1008 laplace[i+j*P_1d] = sum; 1009 } 1010 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1011 for (CeedInt i=0; i<P_1d; i++) 1012 laplace[i+P_1d*i] += perturbation; 1013 return CEED_ERROR_SUCCESS; 1014 } 1015 CeedPragmaOptimizeOn 1016 1017 /// @} 1018 1019 /// ---------------------------------------------------------------------------- 1020 /// CeedOperator Public API 1021 /// ---------------------------------------------------------------------------- 1022 /// @addtogroup CeedOperatorUser 1023 /// @{ 1024 1025 /** 1026 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1027 1028 This returns a CeedVector containing a matrix at each quadrature point 1029 providing the action of the CeedQFunction associated with the CeedOperator. 1030 The vector 'assembled' is of shape 1031 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1032 and contains column-major matrices representing the action of the 1033 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1034 outputs are in the order provided by the user when adding CeedOperator fields. 1035 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1036 'v', provided in that order, would result in an assembled QFunction that 1037 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1038 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1039 1040 Note: Calling this function asserts that setup is complete 1041 and sets the CeedOperator as immutable. 1042 1043 @param op CeedOperator to assemble CeedQFunction 1044 @param[out] assembled CeedVector to store assembled CeedQFunction at 1045 quadrature points 1046 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1047 CeedQFunction 1048 @param request Address of CeedRequest for non-blocking completion, else 1049 @ref CEED_REQUEST_IMMEDIATE 1050 1051 @return An error code: 0 - success, otherwise - failure 1052 1053 @ref User 1054 **/ 1055 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1056 CeedElemRestriction *rstr, 1057 CeedRequest *request) { 1058 int ierr; 1059 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1060 1061 // Backend version 1062 if (op->LinearAssembleQFunction) { 1063 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1064 CeedChk(ierr); 1065 } else { 1066 // Fallback to reference Ceed 1067 if (!op->op_fallback) { 1068 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1069 } 1070 // Assemble 1071 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1072 rstr, request); CeedChk(ierr); 1073 } 1074 return CEED_ERROR_SUCCESS; 1075 } 1076 1077 /** 1078 @brief Assemble CeedQFunction and store result internall. Return copied 1079 references of stored data to the caller. Caller is responsible for 1080 ownership and destruction of the copied references. See also 1081 @ref CeedOperatorLinearAssembleQFunction 1082 1083 @param op CeedOperator to assemble CeedQFunction 1084 @param assembled CeedVector to store assembled CeedQFunction at 1085 quadrature points 1086 @param rstr CeedElemRestriction for CeedVector containing assembled 1087 CeedQFunction 1088 @param request Address of CeedRequest for non-blocking completion, else 1089 @ref CEED_REQUEST_IMMEDIATE 1090 1091 @return An error code: 0 - success, otherwise - failure 1092 1093 @ref User 1094 **/ 1095 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1096 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1097 int ierr; 1098 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1099 1100 // Backend version 1101 if (op->LinearAssembleQFunctionUpdate) { 1102 if (op->has_qf_assembled) { 1103 ierr = op->LinearAssembleQFunctionUpdate(op, op->qf_assembled, 1104 op->qf_assembled_rstr, request); 1105 } else { 1106 ierr = op->LinearAssembleQFunction(op, &op->qf_assembled, 1107 &op->qf_assembled_rstr, request); 1108 } 1109 CeedChk(ierr); 1110 op->has_qf_assembled = true; 1111 // Copy reference to internally held copy 1112 *assembled = NULL; 1113 *rstr = NULL; 1114 ierr = CeedVectorReferenceCopy(op->qf_assembled, assembled); CeedChk(ierr); 1115 ierr = CeedElemRestrictionReferenceCopy(op->qf_assembled_rstr, rstr); 1116 } else { 1117 // Fallback to reference Ceed 1118 if (!op->op_fallback) { 1119 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1120 } 1121 // Assemble 1122 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1123 assembled, rstr, request); CeedChk(ierr); 1124 } 1125 CeedChk(ierr); 1126 1127 return CEED_ERROR_SUCCESS; 1128 } 1129 1130 /** 1131 @brief Assemble the diagonal of a square linear CeedOperator 1132 1133 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1134 1135 Note: Currently only non-composite CeedOperators with a single field and 1136 composite CeedOperators with single field sub-operators are supported. 1137 1138 Note: Calling this function asserts that setup is complete 1139 and sets the CeedOperator as immutable. 1140 1141 @param op CeedOperator to assemble CeedQFunction 1142 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1143 @param request Address of CeedRequest for non-blocking completion, else 1144 @ref CEED_REQUEST_IMMEDIATE 1145 1146 @return An error code: 0 - success, otherwise - failure 1147 1148 @ref User 1149 **/ 1150 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1151 CeedRequest *request) { 1152 int ierr; 1153 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1154 1155 // Use backend version, if available 1156 if (op->LinearAssembleDiagonal) { 1157 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1158 return CEED_ERROR_SUCCESS; 1159 } else if (op->LinearAssembleAddDiagonal) { 1160 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1161 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1162 return CEED_ERROR_SUCCESS; 1163 } else { 1164 // Check for valid fallback resource 1165 const char *resource, *fallback_resource; 1166 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1167 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1168 CeedChk(ierr); 1169 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1170 // Fallback to reference Ceed 1171 if (!op->op_fallback) { 1172 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1173 } 1174 // Assemble 1175 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1176 CeedChk(ierr); 1177 return CEED_ERROR_SUCCESS; 1178 } 1179 } 1180 1181 // Default interface implementation 1182 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1183 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1184 CeedChk(ierr); 1185 return CEED_ERROR_SUCCESS; 1186 } 1187 1188 /** 1189 @brief Assemble the diagonal of a square linear CeedOperator 1190 1191 This sums into a CeedVector the diagonal of a linear CeedOperator. 1192 1193 Note: Currently only non-composite CeedOperators with a single field and 1194 composite CeedOperators with single field sub-operators are supported. 1195 1196 Note: Calling this function asserts that setup is complete 1197 and sets the CeedOperator as immutable. 1198 1199 @param op CeedOperator to assemble CeedQFunction 1200 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1201 @param request Address of CeedRequest for non-blocking completion, else 1202 @ref CEED_REQUEST_IMMEDIATE 1203 1204 @return An error code: 0 - success, otherwise - failure 1205 1206 @ref User 1207 **/ 1208 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1209 CeedRequest *request) { 1210 int ierr; 1211 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1212 1213 // Use backend version, if available 1214 if (op->LinearAssembleAddDiagonal) { 1215 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1216 return CEED_ERROR_SUCCESS; 1217 } else { 1218 // Check for valid fallback resource 1219 const char *resource, *fallback_resource; 1220 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1221 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1222 CeedChk(ierr); 1223 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1224 // Fallback to reference Ceed 1225 if (!op->op_fallback) { 1226 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1227 } 1228 // Assemble 1229 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1230 request); CeedChk(ierr); 1231 return CEED_ERROR_SUCCESS; 1232 } 1233 } 1234 1235 // Default interface implementation 1236 bool is_composite; 1237 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1238 if (is_composite) { 1239 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1240 false, assembled); CeedChk(ierr); 1241 return CEED_ERROR_SUCCESS; 1242 } else { 1243 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1244 CeedChk(ierr); 1245 return CEED_ERROR_SUCCESS; 1246 } 1247 } 1248 1249 /** 1250 @brief Assemble the point block diagonal of a square linear CeedOperator 1251 1252 This overwrites a CeedVector with the point block diagonal of a linear 1253 CeedOperator. 1254 1255 Note: Currently only non-composite CeedOperators with a single field and 1256 composite CeedOperators with single field sub-operators are supported. 1257 1258 Note: Calling this function asserts that setup is complete 1259 and sets the CeedOperator as immutable. 1260 1261 @param op CeedOperator to assemble CeedQFunction 1262 @param[out] assembled CeedVector to store assembled CeedOperator point block 1263 diagonal, provided in row-major form with an 1264 @a num_comp * @a num_comp block at each node. The dimensions 1265 of this vector are derived from the active vector 1266 for the CeedOperator. The array has shape 1267 [nodes, component out, component in]. 1268 @param request Address of CeedRequest for non-blocking completion, else 1269 CEED_REQUEST_IMMEDIATE 1270 1271 @return An error code: 0 - success, otherwise - failure 1272 1273 @ref User 1274 **/ 1275 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1276 CeedVector assembled, CeedRequest *request) { 1277 int ierr; 1278 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1279 1280 // Use backend version, if available 1281 if (op->LinearAssemblePointBlockDiagonal) { 1282 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1283 CeedChk(ierr); 1284 return CEED_ERROR_SUCCESS; 1285 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1286 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1287 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1288 request); CeedChk(ierr); 1289 return CEED_ERROR_SUCCESS; 1290 } else { 1291 // Check for valid fallback resource 1292 const char *resource, *fallback_resource; 1293 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1294 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1295 CeedChk(ierr); 1296 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1297 // Fallback to reference Ceed 1298 if (!op->op_fallback) { 1299 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1300 } 1301 // Assemble 1302 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1303 assembled, request); CeedChk(ierr); 1304 return CEED_ERROR_SUCCESS; 1305 } 1306 } 1307 1308 // Default interface implementation 1309 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1310 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1311 CeedChk(ierr); 1312 return CEED_ERROR_SUCCESS; 1313 } 1314 1315 /** 1316 @brief Assemble the point block diagonal of a square linear CeedOperator 1317 1318 This sums into a CeedVector with the point block diagonal of a linear 1319 CeedOperator. 1320 1321 Note: Currently only non-composite CeedOperators with a single field and 1322 composite CeedOperators with single field sub-operators are supported. 1323 1324 Note: Calling this function asserts that setup is complete 1325 and sets the CeedOperator as immutable. 1326 1327 @param op CeedOperator to assemble CeedQFunction 1328 @param[out] assembled CeedVector to store assembled CeedOperator point block 1329 diagonal, provided in row-major form with an 1330 @a num_comp * @a num_comp block at each node. The dimensions 1331 of this vector are derived from the active vector 1332 for the CeedOperator. The array has shape 1333 [nodes, component out, component in]. 1334 @param request Address of CeedRequest for non-blocking completion, else 1335 CEED_REQUEST_IMMEDIATE 1336 1337 @return An error code: 0 - success, otherwise - failure 1338 1339 @ref User 1340 **/ 1341 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1342 CeedVector assembled, CeedRequest *request) { 1343 int ierr; 1344 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1345 1346 // Use backend version, if available 1347 if (op->LinearAssembleAddPointBlockDiagonal) { 1348 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1349 CeedChk(ierr); 1350 return CEED_ERROR_SUCCESS; 1351 } else { 1352 // Check for valid fallback resource 1353 const char *resource, *fallback_resource; 1354 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1355 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1356 CeedChk(ierr); 1357 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1358 // Fallback to reference Ceed 1359 if (!op->op_fallback) { 1360 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1361 } 1362 // Assemble 1363 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1364 assembled, request); CeedChk(ierr); 1365 return CEED_ERROR_SUCCESS; 1366 } 1367 } 1368 1369 // Default interface implemenation 1370 bool is_composite; 1371 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1372 if (is_composite) { 1373 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1374 true, assembled); CeedChk(ierr); 1375 return CEED_ERROR_SUCCESS; 1376 } else { 1377 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1378 CeedChk(ierr); 1379 return CEED_ERROR_SUCCESS; 1380 } 1381 } 1382 1383 /** 1384 @brief Fully assemble the nonzero pattern of a linear operator. 1385 1386 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1387 1388 The assembly routines use coordinate format, with num_entries tuples of the 1389 form (i, j, value) which indicate that value should be added to the matrix 1390 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1391 This function returns the number of entries and their (i, j) locations, 1392 while CeedOperatorLinearAssemble() provides the values in the same 1393 ordering. 1394 1395 This will generally be slow unless your operator is low-order. 1396 1397 Note: Calling this function asserts that setup is complete 1398 and sets the CeedOperator as immutable. 1399 1400 @param[in] op CeedOperator to assemble 1401 @param[out] num_entries Number of entries in coordinate nonzero pattern 1402 @param[out] rows Row number for each entry 1403 @param[out] cols Column number for each entry 1404 1405 @ref User 1406 **/ 1407 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1408 CeedInt **rows, CeedInt **cols) { 1409 int ierr; 1410 CeedInt num_suboperators, single_entries; 1411 CeedOperator *sub_operators; 1412 bool is_composite; 1413 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1414 1415 // Use backend version, if available 1416 if (op->LinearAssembleSymbolic) { 1417 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1418 return CEED_ERROR_SUCCESS; 1419 } else { 1420 // Check for valid fallback resource 1421 const char *resource, *fallback_resource; 1422 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1423 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1424 CeedChk(ierr); 1425 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1426 // Fallback to reference Ceed 1427 if (!op->op_fallback) { 1428 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1429 } 1430 // Assemble 1431 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1432 cols); CeedChk(ierr); 1433 return CEED_ERROR_SUCCESS; 1434 } 1435 } 1436 1437 // Default interface implementation 1438 1439 // count entries and allocate rows, cols arrays 1440 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1441 *num_entries = 0; 1442 if (is_composite) { 1443 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1444 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1445 for (int k = 0; k < num_suboperators; ++k) { 1446 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1447 &single_entries); CeedChk(ierr); 1448 *num_entries += single_entries; 1449 } 1450 } else { 1451 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1452 &single_entries); CeedChk(ierr); 1453 *num_entries += single_entries; 1454 } 1455 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1456 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1457 1458 // assemble nonzero locations 1459 CeedInt offset = 0; 1460 if (is_composite) { 1461 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1462 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1463 for (int k = 0; k < num_suboperators; ++k) { 1464 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1465 *cols); CeedChk(ierr); 1466 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1467 &single_entries); 1468 CeedChk(ierr); 1469 offset += single_entries; 1470 } 1471 } else { 1472 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1473 CeedChk(ierr); 1474 } 1475 1476 return CEED_ERROR_SUCCESS; 1477 } 1478 1479 /** 1480 @brief Fully assemble the nonzero entries of a linear operator. 1481 1482 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1483 1484 The assembly routines use coordinate format, with num_entries tuples of the 1485 form (i, j, value) which indicate that value should be added to the matrix 1486 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1487 This function returns the values of the nonzero entries to be added, their 1488 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1489 1490 This will generally be slow unless your operator is low-order. 1491 1492 Note: Calling this function asserts that setup is complete 1493 and sets the CeedOperator as immutable. 1494 1495 @param[in] op CeedOperator to assemble 1496 @param[out] values Values to assemble into matrix 1497 1498 @ref User 1499 **/ 1500 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1501 int ierr; 1502 CeedInt num_suboperators, single_entries = 0; 1503 CeedOperator *sub_operators; 1504 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1505 1506 // Use backend version, if available 1507 if (op->LinearAssemble) { 1508 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1509 return CEED_ERROR_SUCCESS; 1510 } else { 1511 // Check for valid fallback resource 1512 const char *resource, *fallback_resource; 1513 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1514 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1515 CeedChk(ierr); 1516 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1517 // Fallback to reference Ceed 1518 if (!op->op_fallback) { 1519 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1520 } 1521 // Assemble 1522 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1523 return CEED_ERROR_SUCCESS; 1524 } 1525 } 1526 1527 // Default interface implementation 1528 bool is_composite; 1529 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1530 1531 CeedInt offset = 0; 1532 if (is_composite) { 1533 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1534 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1535 for (int k = 0; k < num_suboperators; ++k) { 1536 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1537 CeedChk(ierr); 1538 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1539 &single_entries); 1540 CeedChk(ierr); 1541 offset += single_entries; 1542 } 1543 } else { 1544 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1545 } 1546 1547 return CEED_ERROR_SUCCESS; 1548 } 1549 1550 /** 1551 @brief Create a multigrid coarse operator and level transfer operators 1552 for a CeedOperator, creating the prolongation basis from the 1553 fine and coarse grid interpolation 1554 1555 Note: Calling this function asserts that setup is complete 1556 and sets the CeedOperator as immutable. 1557 1558 @param[in] op_fine Fine grid operator 1559 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1560 @param[in] rstr_coarse Coarse grid restriction 1561 @param[in] basis_coarse Coarse grid active vector basis 1562 @param[out] op_coarse Coarse grid operator 1563 @param[out] op_prolong Coarse to fine operator 1564 @param[out] op_restrict Fine to coarse operator 1565 1566 @return An error code: 0 - success, otherwise - failure 1567 1568 @ref User 1569 **/ 1570 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1571 CeedVector p_mult_fine, 1572 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1573 CeedOperator *op_coarse, CeedOperator *op_prolong, 1574 CeedOperator *op_restrict) { 1575 int ierr; 1576 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1577 Ceed ceed; 1578 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1579 1580 // Check for compatible quadrature spaces 1581 CeedBasis basis_fine; 1582 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1583 CeedInt Q_f, Q_c; 1584 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1585 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1586 if (Q_f != Q_c) 1587 // LCOV_EXCL_START 1588 return CeedError(ceed, CEED_ERROR_DIMENSION, 1589 "Bases must have compatible quadrature spaces"); 1590 // LCOV_EXCL_STOP 1591 1592 // Coarse to fine basis 1593 CeedInt P_f, P_c, Q = Q_f; 1594 bool is_tensor_f, is_tensor_c; 1595 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1596 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1597 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1598 if (is_tensor_f && is_tensor_c) { 1599 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1600 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1601 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1602 } else if (!is_tensor_f && !is_tensor_c) { 1603 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1604 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1605 } else { 1606 // LCOV_EXCL_START 1607 return CeedError(ceed, CEED_ERROR_MINOR, 1608 "Bases must both be tensor or non-tensor"); 1609 // LCOV_EXCL_STOP 1610 } 1611 1612 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1613 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1614 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1615 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1616 if (is_tensor_f) { 1617 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1618 memcpy(interp_c, basis_coarse->interp_1d, 1619 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1620 } else { 1621 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1622 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1623 } 1624 1625 // -- QR Factorization, interp_f = Q R 1626 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1627 1628 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1629 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1630 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1631 1632 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1633 for (CeedInt j=0; j<P_c; j++) { // Column j 1634 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1635 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1636 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1637 for (CeedInt k=i+1; k<P_f; k++) 1638 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1639 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1640 } 1641 } 1642 ierr = CeedFree(&tau); CeedChk(ierr); 1643 ierr = CeedFree(&interp_c); CeedChk(ierr); 1644 ierr = CeedFree(&interp_f); CeedChk(ierr); 1645 1646 // Complete with interp_c_to_f versions of code 1647 if (is_tensor_f) { 1648 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1649 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1650 CeedChk(ierr); 1651 } else { 1652 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1653 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1654 CeedChk(ierr); 1655 } 1656 1657 // Cleanup 1658 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1659 return CEED_ERROR_SUCCESS; 1660 } 1661 1662 /** 1663 @brief Create a multigrid coarse operator and level transfer operators 1664 for a CeedOperator with a tensor basis for the active basis 1665 1666 Note: Calling this function asserts that setup is complete 1667 and sets the CeedOperator as immutable. 1668 1669 @param[in] op_fine Fine grid operator 1670 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1671 @param[in] rstr_coarse Coarse grid restriction 1672 @param[in] basis_coarse Coarse grid active vector basis 1673 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1674 @param[out] op_coarse Coarse grid operator 1675 @param[out] op_prolong Coarse to fine operator 1676 @param[out] op_restrict Fine to coarse operator 1677 1678 @return An error code: 0 - success, otherwise - failure 1679 1680 @ref User 1681 **/ 1682 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1683 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1684 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1685 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1686 int ierr; 1687 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1688 Ceed ceed; 1689 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1690 1691 // Check for compatible quadrature spaces 1692 CeedBasis basis_fine; 1693 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1694 CeedInt Q_f, Q_c; 1695 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1696 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1697 if (Q_f != Q_c) 1698 // LCOV_EXCL_START 1699 return CeedError(ceed, CEED_ERROR_DIMENSION, 1700 "Bases must have compatible quadrature spaces"); 1701 // LCOV_EXCL_STOP 1702 1703 // Coarse to fine basis 1704 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1705 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1706 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1707 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1708 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1709 CeedChk(ierr); 1710 P_1d_c = dim == 1 ? num_nodes_c : 1711 dim == 2 ? sqrt(num_nodes_c) : 1712 cbrt(num_nodes_c); 1713 CeedScalar *q_ref, *q_weight, *grad; 1714 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1715 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1716 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1717 CeedBasis basis_c_to_f; 1718 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1719 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1720 CeedChk(ierr); 1721 ierr = CeedFree(&q_ref); CeedChk(ierr); 1722 ierr = CeedFree(&q_weight); CeedChk(ierr); 1723 ierr = CeedFree(&grad); CeedChk(ierr); 1724 1725 // Core code 1726 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1727 basis_coarse, basis_c_to_f, op_coarse, 1728 op_prolong, op_restrict); 1729 CeedChk(ierr); 1730 return CEED_ERROR_SUCCESS; 1731 } 1732 1733 /** 1734 @brief Create a multigrid coarse operator and level transfer operators 1735 for a CeedOperator with a non-tensor basis for the active vector 1736 1737 Note: Calling this function asserts that setup is complete 1738 and sets the CeedOperator as immutable. 1739 1740 @param[in] op_fine Fine grid operator 1741 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1742 @param[in] rstr_coarse Coarse grid restriction 1743 @param[in] basis_coarse Coarse grid active vector basis 1744 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1745 @param[out] op_coarse Coarse grid operator 1746 @param[out] op_prolong Coarse to fine operator 1747 @param[out] op_restrict Fine to coarse operator 1748 1749 @return An error code: 0 - success, otherwise - failure 1750 1751 @ref User 1752 **/ 1753 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1754 CeedVector p_mult_fine, 1755 CeedElemRestriction rstr_coarse, 1756 CeedBasis basis_coarse, 1757 const CeedScalar *interp_c_to_f, 1758 CeedOperator *op_coarse, 1759 CeedOperator *op_prolong, 1760 CeedOperator *op_restrict) { 1761 int ierr; 1762 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1763 Ceed ceed; 1764 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1765 1766 // Check for compatible quadrature spaces 1767 CeedBasis basis_fine; 1768 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1769 CeedInt Q_f, Q_c; 1770 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1771 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1772 if (Q_f != Q_c) 1773 // LCOV_EXCL_START 1774 return CeedError(ceed, CEED_ERROR_DIMENSION, 1775 "Bases must have compatible quadrature spaces"); 1776 // LCOV_EXCL_STOP 1777 1778 // Coarse to fine basis 1779 CeedElemTopology topo; 1780 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1781 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1782 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1783 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1784 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1785 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1786 CeedChk(ierr); 1787 CeedScalar *q_ref, *q_weight, *grad; 1788 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 1789 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1790 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 1791 CeedBasis basis_c_to_f; 1792 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 1793 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1794 CeedChk(ierr); 1795 ierr = CeedFree(&q_ref); CeedChk(ierr); 1796 ierr = CeedFree(&q_weight); CeedChk(ierr); 1797 ierr = CeedFree(&grad); CeedChk(ierr); 1798 1799 // Core code 1800 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1801 basis_coarse, basis_c_to_f, op_coarse, 1802 op_prolong, op_restrict); 1803 CeedChk(ierr); 1804 return CEED_ERROR_SUCCESS; 1805 } 1806 1807 /** 1808 @brief Build a FDM based approximate inverse for each element for a 1809 CeedOperator 1810 1811 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 1812 Method based approximate inverse. This function obtains the simultaneous 1813 diagonalization for the 1D mass and Laplacian operators, 1814 M = V^T V, K = V^T S V. 1815 The assembled QFunction is used to modify the eigenvalues from simultaneous 1816 diagonalization and obtain an approximate inverse of the form 1817 V^T S^hat V. The CeedOperator must be linear and non-composite. The 1818 associated CeedQFunction must therefore also be linear. 1819 1820 Note: Calling this function asserts that setup is complete 1821 and sets the CeedOperator as immutable. 1822 1823 @param op CeedOperator to create element inverses 1824 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 1825 for each element 1826 @param request Address of CeedRequest for non-blocking completion, else 1827 @ref CEED_REQUEST_IMMEDIATE 1828 1829 @return An error code: 0 - success, otherwise - failure 1830 1831 @ref Backend 1832 **/ 1833 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 1834 CeedRequest *request) { 1835 int ierr; 1836 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1837 1838 // Use backend version, if available 1839 if (op->CreateFDMElementInverse) { 1840 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 1841 return CEED_ERROR_SUCCESS; 1842 } else { 1843 // Check for valid fallback resource 1844 const char *resource, *fallback_resource; 1845 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1846 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1847 CeedChk(ierr); 1848 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1849 // Fallback to reference Ceed 1850 if (!op->op_fallback) { 1851 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1852 } 1853 // Assemble 1854 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 1855 CeedChk(ierr); 1856 return CEED_ERROR_SUCCESS; 1857 } 1858 } 1859 1860 // Interface implementation 1861 Ceed ceed, ceed_parent; 1862 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1863 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 1864 ceed_parent = ceed_parent ? ceed_parent : ceed; 1865 CeedQFunction qf; 1866 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 1867 1868 // Determine active input basis 1869 bool interp = false, grad = false; 1870 CeedBasis basis = NULL; 1871 CeedElemRestriction rstr = NULL; 1872 CeedOperatorField *op_fields; 1873 CeedQFunctionField *qf_fields; 1874 CeedInt num_input_fields; 1875 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 1876 CeedChk(ierr); 1877 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 1878 for (CeedInt i=0; i<num_input_fields; i++) { 1879 CeedVector vec; 1880 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 1881 if (vec == CEED_VECTOR_ACTIVE) { 1882 CeedEvalMode eval_mode; 1883 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 1884 interp = interp || eval_mode == CEED_EVAL_INTERP; 1885 grad = grad || eval_mode == CEED_EVAL_GRAD; 1886 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 1887 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 1888 } 1889 } 1890 if (!basis) 1891 // LCOV_EXCL_START 1892 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 1893 // LCOV_EXCL_STOP 1894 CeedSize l_size = 1; 1895 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 1896 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 1897 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 1898 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 1899 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 1900 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 1901 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 1902 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 1903 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 1904 1905 // Build and diagonalize 1D Mass and Laplacian 1906 bool tensor_basis; 1907 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 1908 if (!tensor_basis) 1909 // LCOV_EXCL_START 1910 return CeedError(ceed, CEED_ERROR_BACKEND, 1911 "FDMElementInverse only supported for tensor " 1912 "bases"); 1913 // LCOV_EXCL_STOP 1914 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 1915 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 1916 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 1917 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 1918 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 1919 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 1920 // -- Build matrices 1921 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 1922 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 1923 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 1924 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 1925 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 1926 mass, laplace); CeedChk(ierr); 1927 1928 // -- Diagonalize 1929 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 1930 CeedChk(ierr); 1931 ierr = CeedFree(&mass); CeedChk(ierr); 1932 ierr = CeedFree(&laplace); CeedChk(ierr); 1933 for (CeedInt i=0; i<P_1d; i++) 1934 for (CeedInt j=0; j<P_1d; j++) 1935 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 1936 ierr = CeedFree(&x); CeedChk(ierr); 1937 1938 // Assemble QFunction 1939 CeedVector assembled; 1940 CeedElemRestriction rstr_qf; 1941 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 1942 &rstr_qf, request); CeedChk(ierr); 1943 CeedInt layout[3]; 1944 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 1945 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 1946 CeedScalar max_norm = 0; 1947 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 1948 1949 // Calculate element averages 1950 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 1951 CeedScalar *elem_avg; 1952 const CeedScalar *assembled_array, *q_weight_array; 1953 CeedVector q_weight; 1954 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 1955 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 1956 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 1957 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 1958 CeedChk(ierr); 1959 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 1960 CeedChk(ierr); 1961 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 1962 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 1963 for (CeedInt e=0; e<num_elem; e++) { 1964 CeedInt count = 0; 1965 for (CeedInt q=0; q<num_qpts; q++) 1966 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 1967 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 1968 qf_value_bound) { 1969 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 1970 q_weight_array[q]; 1971 count++; 1972 } 1973 if (count) { 1974 elem_avg[e] /= count; 1975 } else { 1976 elem_avg[e] = 1.0; 1977 } 1978 } 1979 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 1980 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 1981 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 1982 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 1983 1984 // Build FDM diagonal 1985 CeedVector q_data; 1986 CeedScalar *q_data_array, *fdm_diagonal; 1987 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 1988 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 1989 for (CeedInt c=0; c<num_comp; c++) 1990 for (CeedInt n=0; n<elem_size; n++) { 1991 if (interp) 1992 fdm_diagonal[c*elem_size + n] = 1.0; 1993 if (grad) 1994 for (CeedInt d=0; d<dim; d++) { 1995 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 1996 fdm_diagonal[c*elem_size + n] += lambda[i]; 1997 } 1998 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 1999 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2000 } 2001 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2002 CeedChk(ierr); 2003 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2004 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2005 CeedChk(ierr); 2006 for (CeedInt e=0; e<num_elem; e++) 2007 for (CeedInt c=0; c<num_comp; c++) 2008 for (CeedInt n=0; n<elem_size; n++) 2009 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2010 fdm_diagonal[c*elem_size + n]); 2011 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2012 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2013 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2014 2015 // Setup FDM operator 2016 // -- Basis 2017 CeedBasis fdm_basis; 2018 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2019 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2020 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2021 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2022 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2023 fdm_interp, grad_dummy, q_ref_dummy, 2024 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2025 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2026 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2027 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2028 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2029 ierr = CeedFree(&lambda); CeedChk(ierr); 2030 2031 // -- Restriction 2032 CeedElemRestriction rstr_qd_i; 2033 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2034 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2035 num_comp, num_elem*num_comp*elem_size, 2036 strides, &rstr_qd_i); CeedChk(ierr); 2037 // -- QFunction 2038 CeedQFunction qf_fdm; 2039 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2040 CeedChk(ierr); 2041 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2042 CeedChk(ierr); 2043 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2044 CeedChk(ierr); 2045 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2046 CeedChk(ierr); 2047 // -- QFunction context 2048 CeedInt *num_comp_data; 2049 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2050 num_comp_data[0] = num_comp; 2051 CeedQFunctionContext ctx_fdm; 2052 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2053 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2054 sizeof(*num_comp_data), num_comp_data); 2055 CeedChk(ierr); 2056 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2057 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2058 // -- Operator 2059 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2060 CeedChk(ierr); 2061 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2062 CeedChk(ierr); 2063 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2064 q_data); CeedChk(ierr); 2065 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2066 CeedChk(ierr); 2067 2068 // Cleanup 2069 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2070 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2071 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2072 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2073 2074 return CEED_ERROR_SUCCESS; 2075 } 2076 2077 /// @} 2078