1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->is_interface_setup = false; 75 op_ref->is_backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 op->op_fallback = op_ref; 79 80 // Clone QF 81 CeedQFunction qf_ref; 82 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 83 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 84 qf_ref->data = NULL; 85 qf_ref->ceed = ceed_ref; 86 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 87 op_ref->qf = qf_ref; 88 op->qf_fallback = qf_ref; 89 return CEED_ERROR_SUCCESS; 90 } 91 92 /** 93 @brief Select correct basis matrix pointer based on CeedEvalMode 94 95 @param[in] eval_mode Current basis evaluation mode 96 @param[in] identity Pointer to identity matrix 97 @param[in] interp Pointer to interpolation matrix 98 @param[in] grad Pointer to gradient matrix 99 @param[out] basis_ptr Basis pointer to set 100 101 @ref Developer 102 **/ 103 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 104 const CeedScalar *identity, const CeedScalar *interp, 105 const CeedScalar *grad, const CeedScalar **basis_ptr) { 106 switch (eval_mode) { 107 case CEED_EVAL_NONE: 108 *basis_ptr = identity; 109 break; 110 case CEED_EVAL_INTERP: 111 *basis_ptr = interp; 112 break; 113 case CEED_EVAL_GRAD: 114 *basis_ptr = grad; 115 break; 116 case CEED_EVAL_WEIGHT: 117 case CEED_EVAL_DIV: 118 case CEED_EVAL_CURL: 119 break; // Caught by QF Assembly 120 } 121 } 122 123 /** 124 @brief Create point block restriction for active operator field 125 126 @param[in] rstr Original CeedElemRestriction for active field 127 @param[out] pointblock_rstr Address of the variable where the newly created 128 CeedElemRestriction will be stored 129 130 @return An error code: 0 - success, otherwise - failure 131 132 @ref Developer 133 **/ 134 static int CeedOperatorCreateActivePointBlockRestriction( 135 CeedElemRestriction rstr, 136 CeedElemRestriction *pointblock_rstr) { 137 int ierr; 138 Ceed ceed; 139 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 140 const CeedInt *offsets; 141 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 142 CeedChk(ierr); 143 144 // Expand offsets 145 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 146 *pointblock_offsets; 147 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 148 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 149 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 151 CeedInt shift = num_comp; 152 if (comp_stride != 1) 153 shift *= num_comp; 154 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 155 CeedChk(ierr); 156 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 157 pointblock_offsets[i] = offsets[i]*shift; 158 if (pointblock_offsets[i] > max) 159 max = pointblock_offsets[i]; 160 } 161 162 // Create new restriction 163 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 164 1, max + num_comp*num_comp, CEED_MEM_HOST, 165 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 166 CeedChk(ierr); 167 168 // Cleanup 169 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 170 171 return CEED_ERROR_SUCCESS; 172 } 173 174 /** 175 @brief Core logic for assembling operator diagonal or point block diagonal 176 177 @param[in] op CeedOperator to assemble point block diagonal 178 @param[in] request Address of CeedRequest for non-blocking completion, else 179 CEED_REQUEST_IMMEDIATE 180 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 181 @param[out] assembled CeedVector to store assembled diagonal 182 183 @return An error code: 0 - success, otherwise - failure 184 185 @ref Developer 186 **/ 187 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 188 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 189 int ierr; 190 Ceed ceed; 191 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 192 193 // Assemble QFunction 194 CeedQFunction qf; 195 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 196 CeedInt num_input_fields, num_output_fields; 197 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 198 CeedChk(ierr); 199 CeedVector assembled_qf; 200 CeedElemRestriction rstr; 201 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 202 &rstr, request); CeedChk(ierr); 203 CeedInt layout[3]; 204 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 205 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 206 CeedScalar max_norm = 0; 207 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 208 209 // Determine active input basis 210 CeedOperatorField *op_fields; 211 CeedQFunctionField *qf_fields; 212 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 213 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 214 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 215 CeedEvalMode *eval_mode_in = NULL; 216 CeedBasis basis_in = NULL; 217 CeedElemRestriction rstr_in = NULL; 218 for (CeedInt i=0; i<num_input_fields; i++) { 219 CeedVector vec; 220 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 221 if (vec == CEED_VECTOR_ACTIVE) { 222 CeedElemRestriction rstr; 223 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 224 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 225 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 226 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 227 if (rstr_in && rstr_in != rstr) 228 // LCOV_EXCL_START 229 return CeedError(ceed, CEED_ERROR_BACKEND, 230 "Multi-field non-composite operator diagonal assembly not supported"); 231 // LCOV_EXCL_STOP 232 rstr_in = rstr; 233 CeedEvalMode eval_mode; 234 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 235 switch (eval_mode) { 236 case CEED_EVAL_NONE: 237 case CEED_EVAL_INTERP: 238 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 239 eval_mode_in[num_eval_mode_in] = eval_mode; 240 num_eval_mode_in += 1; 241 break; 242 case CEED_EVAL_GRAD: 243 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 244 for (CeedInt d=0; d<dim; d++) 245 eval_mode_in[num_eval_mode_in+d] = eval_mode; 246 num_eval_mode_in += dim; 247 break; 248 case CEED_EVAL_WEIGHT: 249 case CEED_EVAL_DIV: 250 case CEED_EVAL_CURL: 251 break; // Caught by QF Assembly 252 } 253 } 254 } 255 256 // Determine active output basis 257 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 258 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 259 CeedInt num_eval_mode_out = 0; 260 CeedEvalMode *eval_mode_out = NULL; 261 CeedBasis basis_out = NULL; 262 CeedElemRestriction rstr_out = NULL; 263 for (CeedInt i=0; i<num_output_fields; i++) { 264 CeedVector vec; 265 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 266 if (vec == CEED_VECTOR_ACTIVE) { 267 CeedElemRestriction rstr; 268 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 269 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 270 if (rstr_out && rstr_out != rstr) 271 // LCOV_EXCL_START 272 return CeedError(ceed, CEED_ERROR_BACKEND, 273 "Multi-field non-composite operator diagonal assembly not supported"); 274 // LCOV_EXCL_STOP 275 rstr_out = rstr; 276 CeedEvalMode eval_mode; 277 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 278 switch (eval_mode) { 279 case CEED_EVAL_NONE: 280 case CEED_EVAL_INTERP: 281 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 282 eval_mode_out[num_eval_mode_out] = eval_mode; 283 num_eval_mode_out += 1; 284 break; 285 case CEED_EVAL_GRAD: 286 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 287 for (CeedInt d=0; d<dim; d++) 288 eval_mode_out[num_eval_mode_out+d] = eval_mode; 289 num_eval_mode_out += dim; 290 break; 291 case CEED_EVAL_WEIGHT: 292 case CEED_EVAL_DIV: 293 case CEED_EVAL_CURL: 294 break; // Caught by QF Assembly 295 } 296 } 297 } 298 299 // Assemble point block diagonal restriction, if needed 300 CeedElemRestriction diag_rstr = rstr_out; 301 if (is_pointblock) { 302 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 303 CeedChk(ierr); 304 } 305 306 // Create diagonal vector 307 CeedVector elem_diag; 308 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 309 CeedChk(ierr); 310 311 // Assemble element operator diagonals 312 CeedScalar *elem_diag_array; 313 const CeedScalar *assembled_qf_array; 314 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 315 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 316 CeedChk(ierr); 317 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 318 CeedChk(ierr); 319 CeedInt num_elem, num_nodes, num_qpts; 320 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 321 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 322 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 323 // Basis matrices 324 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 325 CeedScalar *identity = NULL; 326 bool evalNone = false; 327 for (CeedInt i=0; i<num_eval_mode_in; i++) 328 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 329 for (CeedInt i=0; i<num_eval_mode_out; i++) 330 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 331 if (evalNone) { 332 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 333 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 334 identity[i*num_nodes+i] = 1.0; 335 } 336 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 337 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 338 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 339 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 340 // Compute the diagonal of B^T D B 341 // Each element 342 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 343 for (CeedInt e=0; e<num_elem; e++) { 344 CeedInt d_out = -1; 345 // Each basis eval mode pair 346 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 347 const CeedScalar *bt = NULL; 348 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 349 d_out += 1; 350 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 351 &grad_out[d_out*num_qpts*num_nodes], &bt); 352 CeedInt d_in = -1; 353 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 354 const CeedScalar *b = NULL; 355 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 356 d_in += 1; 357 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 358 &grad_in[d_in*num_qpts*num_nodes], &b); 359 // Each component 360 for (CeedInt c_out=0; c_out<num_comp; c_out++) 361 // Each qpoint/node pair 362 for (CeedInt q=0; q<num_qpts; q++) 363 if (is_pointblock) { 364 // Point Block Diagonal 365 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 366 const CeedScalar qf_value = 367 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 368 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 369 if (fabs(qf_value) > qf_value_bound) 370 for (CeedInt n=0; n<num_nodes; n++) 371 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 372 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 373 } 374 } else { 375 // Diagonal Only 376 const CeedScalar qf_value = 377 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 378 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 379 if (fabs(qf_value) > qf_value_bound) 380 for (CeedInt n=0; n<num_nodes; n++) 381 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 382 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 383 } 384 } 385 } 386 } 387 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 388 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 389 CeedChk(ierr); 390 391 // Assemble local operator diagonal 392 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 393 assembled, request); CeedChk(ierr); 394 395 // Cleanup 396 if (is_pointblock) { 397 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 398 } 399 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 400 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 401 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 402 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 403 ierr = CeedFree(&identity); CeedChk(ierr); 404 405 return CEED_ERROR_SUCCESS; 406 } 407 408 /** 409 @brief Core logic for assembling composite operator diagonal 410 411 @param[in] op CeedOperator to assemble point block diagonal 412 @param[in] request Address of CeedRequest for non-blocking completion, else 413 CEED_REQUEST_IMMEDIATE 414 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 415 @param[out] assembled CeedVector to store assembled diagonal 416 417 @return An error code: 0 - success, otherwise - failure 418 419 @ref Developer 420 **/ 421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 422 CeedOperator op, CeedRequest *request, const bool is_pointblock, 423 CeedVector assembled) { 424 int ierr; 425 CeedInt num_sub; 426 CeedOperator *suboperators; 427 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 428 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 429 for (CeedInt i = 0; i < num_sub; i++) { 430 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 431 is_pointblock, assembled); CeedChk(ierr); 432 } 433 return CEED_ERROR_SUCCESS; 434 } 435 436 /** 437 @brief Build nonzero pattern for non-composite operator 438 439 Users should generally use CeedOperatorLinearAssembleSymbolic() 440 441 @param[in] op CeedOperator to assemble nonzero pattern 442 @param[in] offset Offset for number of entries 443 @param[out] rows Row number for each entry 444 @param[out] cols Column number for each entry 445 446 @return An error code: 0 - success, otherwise - failure 447 448 @ref Developer 449 **/ 450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 451 CeedInt *rows, CeedInt *cols) { 452 int ierr; 453 Ceed ceed = op->ceed; 454 if (op->is_composite) 455 // LCOV_EXCL_START 456 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 457 "Composite operator not supported"); 458 // LCOV_EXCL_STOP 459 460 CeedElemRestriction rstr_in; 461 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 462 CeedInt num_elem, elem_size, num_nodes, num_comp; 463 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 464 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 465 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 466 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 467 CeedInt layout_er[3]; 468 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 469 470 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 471 472 // Determine elem_dof relation 473 CeedVector index_vec; 474 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 475 CeedScalar *array; 476 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 477 for (CeedInt i = 0; i < num_nodes; ++i) { 478 array[i] = i; 479 } 480 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 481 CeedVector elem_dof; 482 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 483 CeedChk(ierr); 484 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 485 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 486 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 487 const CeedScalar *elem_dof_a; 488 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 489 CeedChk(ierr); 490 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 491 492 // Determine i, j locations for element matrices 493 CeedInt count = 0; 494 for (int e = 0; e < num_elem; ++e) { 495 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 496 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 497 for (int i = 0; i < elem_size; ++i) { 498 for (int j = 0; j < elem_size; ++j) { 499 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 500 (comp_out)*layout_er[1] + e*layout_er[2]; 501 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 502 (comp_in)*layout_er[1] + e*layout_er[2]; 503 504 const CeedInt row = elem_dof_a[elem_dof_index_row]; 505 const CeedInt col = elem_dof_a[elem_dof_index_col]; 506 507 rows[offset + count] = row; 508 cols[offset + count] = col; 509 count++; 510 } 511 } 512 } 513 } 514 } 515 if (count != local_num_entries) 516 // LCOV_EXCL_START 517 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 518 // LCOV_EXCL_STOP 519 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 520 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 521 522 return CEED_ERROR_SUCCESS; 523 } 524 525 /** 526 @brief Assemble nonzero entries for non-composite operator 527 528 Users should generally use CeedOperatorLinearAssemble() 529 530 @param[in] op CeedOperator to assemble 531 @param[out] offset Offest for number of entries 532 @param[out] values Values to assemble into matrix 533 534 @return An error code: 0 - success, otherwise - failure 535 536 @ref Developer 537 **/ 538 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 539 CeedVector values) { 540 int ierr; 541 Ceed ceed = op->ceed; 542 if (op->is_composite) 543 // LCOV_EXCL_START 544 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 545 "Composite operator not supported"); 546 // LCOV_EXCL_STOP 547 548 // Assemble QFunction 549 CeedQFunction qf; 550 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 551 CeedVector assembled_qf; 552 CeedElemRestriction rstr_q; 553 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 554 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 555 556 CeedInt qf_length; 557 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 558 559 CeedInt num_input_fields, num_output_fields; 560 CeedOperatorField *input_fields; 561 CeedOperatorField *output_fields; 562 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 563 &num_output_fields, &output_fields); CeedChk(ierr); 564 565 // Determine active input basis 566 CeedQFunctionField *qf_fields; 567 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 568 CeedInt num_eval_mode_in = 0, dim = 1; 569 CeedEvalMode *eval_mode_in = NULL; 570 CeedBasis basis_in = NULL; 571 CeedElemRestriction rstr_in = NULL; 572 for (CeedInt i=0; i<num_input_fields; i++) { 573 CeedVector vec; 574 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 575 if (vec == CEED_VECTOR_ACTIVE) { 576 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 577 CeedChk(ierr); 578 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 579 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 580 CeedChk(ierr); 581 CeedEvalMode eval_mode; 582 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 583 CeedChk(ierr); 584 switch (eval_mode) { 585 case CEED_EVAL_NONE: 586 case CEED_EVAL_INTERP: 587 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 588 eval_mode_in[num_eval_mode_in] = eval_mode; 589 num_eval_mode_in += 1; 590 break; 591 case CEED_EVAL_GRAD: 592 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 593 for (CeedInt d=0; d<dim; d++) { 594 eval_mode_in[num_eval_mode_in+d] = eval_mode; 595 } 596 num_eval_mode_in += dim; 597 break; 598 case CEED_EVAL_WEIGHT: 599 case CEED_EVAL_DIV: 600 case CEED_EVAL_CURL: 601 break; // Caught by QF Assembly 602 } 603 } 604 } 605 606 // Determine active output basis 607 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 608 CeedInt num_eval_mode_out = 0; 609 CeedEvalMode *eval_mode_out = NULL; 610 CeedBasis basis_out = NULL; 611 CeedElemRestriction rstr_out = NULL; 612 for (CeedInt i=0; i<num_output_fields; i++) { 613 CeedVector vec; 614 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 615 if (vec == CEED_VECTOR_ACTIVE) { 616 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 617 CeedChk(ierr); 618 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 619 CeedChk(ierr); 620 CeedEvalMode eval_mode; 621 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 622 CeedChk(ierr); 623 switch (eval_mode) { 624 case CEED_EVAL_NONE: 625 case CEED_EVAL_INTERP: 626 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 627 eval_mode_out[num_eval_mode_out] = eval_mode; 628 num_eval_mode_out += 1; 629 break; 630 case CEED_EVAL_GRAD: 631 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 632 for (CeedInt d=0; d<dim; d++) { 633 eval_mode_out[num_eval_mode_out+d] = eval_mode; 634 } 635 num_eval_mode_out += dim; 636 break; 637 case CEED_EVAL_WEIGHT: 638 case CEED_EVAL_DIV: 639 case CEED_EVAL_CURL: 640 break; // Caught by QF Assembly 641 } 642 } 643 } 644 645 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 646 // LCOV_EXCL_START 647 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 648 "Cannot assemble operator with out inputs/outputs"); 649 // LCOV_EXCL_STOP 650 651 CeedInt num_elem, elem_size, num_qpts, num_comp; 652 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 653 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 654 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 655 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 656 657 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 658 659 // loop over elements and put in data structure 660 const CeedScalar *interp_in, *grad_in; 661 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 662 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 663 664 const CeedScalar *assembled_qf_array; 665 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 666 CeedChk(ierr); 667 668 CeedInt layout_qf[3]; 669 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 670 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 671 672 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 673 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 674 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 675 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 676 num_qpts]; // logically 3-tensor 677 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 678 CeedScalar elem_mat[elem_size * elem_size]; 679 int count = 0; 680 CeedScalar *vals; 681 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 682 for (int e = 0; e < num_elem; ++e) { 683 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 684 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 685 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 686 B_mat_in[ell] = 0.0; 687 } 688 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 689 B_mat_out[ell] = 0.0; 690 } 691 // Store block-diagonal D matrix as collection of small dense blocks 692 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 693 D_mat[ell] = 0.0; 694 } 695 // form element matrix itself (for each block component) 696 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 697 elem_mat[ell] = 0.0; 698 } 699 for (int q = 0; q < num_qpts; ++q) { 700 for (int n = 0; n < elem_size; ++n) { 701 CeedInt d_in = -1; 702 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 703 const int qq = num_eval_mode_in*q; 704 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 705 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 706 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 707 d_in += 1; 708 B_mat_in[(qq+e_in)*elem_size + n] += 709 grad_in[(d_in*num_qpts+q) * elem_size + n]; 710 } else { 711 // LCOV_EXCL_START 712 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 713 // LCOV_EXCL_STOP 714 } 715 } 716 CeedInt d_out = -1; 717 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 718 const int qq = num_eval_mode_out*q; 719 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 720 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 721 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 722 d_out += 1; 723 B_mat_out[(qq+e_out)*elem_size + n] += 724 grad_in[(d_out*num_qpts+q) * elem_size + n]; 725 } else { 726 // LCOV_EXCL_START 727 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 728 // LCOV_EXCL_STOP 729 } 730 } 731 } 732 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 733 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 734 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 735 +comp_out; 736 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 737 e*layout_qf[2]; 738 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 739 } 740 } 741 } 742 // Compute B^T*D 743 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 744 BTD[ell] = 0.0; 745 } 746 for (int j = 0; j<elem_size; ++j) { 747 for (int q = 0; q<num_qpts; ++q) { 748 int qq = num_eval_mode_out*q; 749 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 750 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 751 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 752 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 753 } 754 } 755 } 756 } 757 758 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 759 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 760 761 // put element matrix in coordinate data structure 762 for (int i = 0; i < elem_size; ++i) { 763 for (int j = 0; j < elem_size; ++j) { 764 vals[offset + count] = elem_mat[i*elem_size + j]; 765 count++; 766 } 767 } 768 } 769 } 770 } 771 if (count != local_num_entries) 772 // LCOV_EXCL_START 773 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 774 // LCOV_EXCL_STOP 775 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 776 777 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 778 CeedChk(ierr); 779 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 780 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 781 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 782 783 return CEED_ERROR_SUCCESS; 784 } 785 786 /** 787 @brief Count number of entries for assembled CeedOperator 788 789 @param[in] op CeedOperator to assemble 790 @param[out] num_entries Number of entries in assembled representation 791 792 @return An error code: 0 - success, otherwise - failure 793 794 @ref Utility 795 **/ 796 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 797 CeedInt *num_entries) { 798 int ierr; 799 CeedElemRestriction rstr; 800 CeedInt num_elem, elem_size, num_comp; 801 802 if (op->is_composite) 803 // LCOV_EXCL_START 804 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 805 "Composite operator not supported"); 806 // LCOV_EXCL_STOP 807 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 808 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 809 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 810 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 811 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 812 813 return CEED_ERROR_SUCCESS; 814 } 815 816 /** 817 @brief Common code for creating a multigrid coarse operator and level 818 transfer operators for a CeedOperator 819 820 @param[in] op_fine Fine grid operator 821 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 822 @param[in] rstr_coarse Coarse grid restriction 823 @param[in] basis_coarse Coarse grid active vector basis 824 @param[in] basis_c_to_f Basis for coarse to fine interpolation 825 @param[out] op_coarse Coarse grid operator 826 @param[out] op_prolong Coarse to fine operator 827 @param[out] op_restrict Fine to coarse operator 828 829 @return An error code: 0 - success, otherwise - failure 830 831 @ref Developer 832 **/ 833 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 834 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 835 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 836 CeedOperator *op_restrict) { 837 int ierr; 838 Ceed ceed; 839 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 840 841 // Check for composite operator 842 bool is_composite; 843 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 844 if (is_composite) 845 // LCOV_EXCL_START 846 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 847 "Automatic multigrid setup for composite operators not supported"); 848 // LCOV_EXCL_STOP 849 850 // Coarse Grid 851 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 852 op_coarse); CeedChk(ierr); 853 CeedElemRestriction rstr_fine = NULL; 854 // -- Clone input fields 855 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 856 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 857 rstr_fine = op_fine->input_fields[i]->elem_restr; 858 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 859 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 860 CeedChk(ierr); 861 } else { 862 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 863 op_fine->input_fields[i]->elem_restr, 864 op_fine->input_fields[i]->basis, 865 op_fine->input_fields[i]->vec); CeedChk(ierr); 866 } 867 } 868 // -- Clone output fields 869 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 870 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 871 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 872 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 873 CeedChk(ierr); 874 } else { 875 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 876 op_fine->output_fields[i]->elem_restr, 877 op_fine->output_fields[i]->basis, 878 op_fine->output_fields[i]->vec); CeedChk(ierr); 879 } 880 } 881 882 // Multiplicity vector 883 CeedVector mult_vec, mult_e_vec; 884 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 885 CeedChk(ierr); 886 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 887 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 888 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 889 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 890 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 891 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 892 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 893 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 894 895 // Restriction 896 CeedInt num_comp; 897 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 898 CeedQFunction qf_restrict; 899 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 900 CeedChk(ierr); 901 CeedInt *num_comp_r_data; 902 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 903 num_comp_r_data[0] = num_comp; 904 CeedQFunctionContext ctx_r; 905 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 906 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 907 sizeof(*num_comp_r_data), num_comp_r_data); 908 CeedChk(ierr); 909 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 910 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 911 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 912 CeedChk(ierr); 913 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 914 CeedChk(ierr); 915 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 916 CEED_EVAL_INTERP); CeedChk(ierr); 917 918 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 919 CEED_QFUNCTION_NONE, op_restrict); 920 CeedChk(ierr); 921 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 922 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 923 CeedChk(ierr); 924 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 925 CEED_BASIS_COLLOCATED, mult_vec); 926 CeedChk(ierr); 927 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 928 CEED_VECTOR_ACTIVE); CeedChk(ierr); 929 930 // Prolongation 931 CeedQFunction qf_prolong; 932 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 933 CeedChk(ierr); 934 CeedInt *num_comp_p_data; 935 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 936 num_comp_p_data[0] = num_comp; 937 CeedQFunctionContext ctx_p; 938 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 939 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 940 sizeof(*num_comp_p_data), num_comp_p_data); 941 CeedChk(ierr); 942 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 943 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 944 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 945 CeedChk(ierr); 946 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 947 CeedChk(ierr); 948 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 949 CeedChk(ierr); 950 951 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 952 CEED_QFUNCTION_NONE, op_prolong); 953 CeedChk(ierr); 954 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 955 CEED_VECTOR_ACTIVE); CeedChk(ierr); 956 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 957 CEED_BASIS_COLLOCATED, mult_vec); 958 CeedChk(ierr); 959 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 960 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 961 CeedChk(ierr); 962 963 // Cleanup 964 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 965 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 966 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 967 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 968 return CEED_ERROR_SUCCESS; 969 } 970 971 /** 972 @brief Build 1D mass matrix and Laplacian with perturbation 973 974 @param[in] interp_1d Interpolation matrix in one dimension 975 @param[in] grad_1d Gradient matrix in one dimension 976 @param[in] q_weight_1d Quadrature weights in one dimension 977 @param[in] P_1d Number of basis nodes in one dimension 978 @param[in] Q_1d Number of quadrature points in one dimension 979 @param[in] dim Dimension of basis 980 @param[out] mass Assembled mass matrix in one dimension 981 @param[out] laplace Assembled perturbed Laplacian in one dimension 982 983 @return An error code: 0 - success, otherwise - failure 984 985 @ref Developer 986 **/ 987 CeedPragmaOptimizeOff 988 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 989 const CeedScalar *grad_1d, 990 const CeedScalar *q_weight_1d, CeedInt P_1d, 991 CeedInt Q_1d, CeedInt dim, 992 CeedScalar *mass, CeedScalar *laplace) { 993 994 for (CeedInt i=0; i<P_1d; i++) 995 for (CeedInt j=0; j<P_1d; j++) { 996 CeedScalar sum = 0.0; 997 for (CeedInt k=0; k<Q_1d; k++) 998 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 999 mass[i+j*P_1d] = sum; 1000 } 1001 // -- Laplacian 1002 for (CeedInt i=0; i<P_1d; i++) 1003 for (CeedInt j=0; j<P_1d; j++) { 1004 CeedScalar sum = 0.0; 1005 for (CeedInt k=0; k<Q_1d; k++) 1006 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1007 laplace[i+j*P_1d] = sum; 1008 } 1009 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1010 for (CeedInt i=0; i<P_1d; i++) 1011 laplace[i+P_1d*i] += perturbation; 1012 return CEED_ERROR_SUCCESS; 1013 } 1014 CeedPragmaOptimizeOn 1015 1016 /// @} 1017 1018 /// ---------------------------------------------------------------------------- 1019 /// CeedOperator Public API 1020 /// ---------------------------------------------------------------------------- 1021 /// @addtogroup CeedOperatorUser 1022 /// @{ 1023 1024 /** 1025 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1026 1027 This returns a CeedVector containing a matrix at each quadrature point 1028 providing the action of the CeedQFunction associated with the CeedOperator. 1029 The vector 'assembled' is of shape 1030 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1031 and contains column-major matrices representing the action of the 1032 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1033 outputs are in the order provided by the user when adding CeedOperator fields. 1034 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1035 'v', provided in that order, would result in an assembled QFunction that 1036 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1037 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1038 1039 Note: Calling this function asserts that setup is complete 1040 and sets the CeedOperator as immutable. 1041 1042 @param op CeedOperator to assemble CeedQFunction 1043 @param[out] assembled CeedVector to store assembled CeedQFunction at 1044 quadrature points 1045 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1046 CeedQFunction 1047 @param request Address of CeedRequest for non-blocking completion, else 1048 @ref CEED_REQUEST_IMMEDIATE 1049 1050 @return An error code: 0 - success, otherwise - failure 1051 1052 @ref User 1053 **/ 1054 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1055 CeedElemRestriction *rstr, 1056 CeedRequest *request) { 1057 int ierr; 1058 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1059 1060 // Backend version 1061 if (op->LinearAssembleQFunction) { 1062 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1063 CeedChk(ierr); 1064 } else { 1065 // Fallback to reference Ceed 1066 if (!op->op_fallback) { 1067 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1068 } 1069 // Assemble 1070 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1071 rstr, request); CeedChk(ierr); 1072 } 1073 return CEED_ERROR_SUCCESS; 1074 } 1075 1076 /** 1077 @brief Assemble CeedQFunction and store result internall. Return copied 1078 references of stored data to the caller. Caller is responsible for 1079 ownership and destruction of the copied references. See also 1080 @ref CeedOperatorLinearAssembleQFunction 1081 1082 @param op CeedOperator to assemble CeedQFunction 1083 @param assembled CeedVector to store assembled CeedQFunction at 1084 quadrature points 1085 @param rstr CeedElemRestriction for CeedVector containing assembled 1086 CeedQFunction 1087 @param request Address of CeedRequest for non-blocking completion, else 1088 @ref CEED_REQUEST_IMMEDIATE 1089 1090 @return An error code: 0 - success, otherwise - failure 1091 1092 @ref User 1093 **/ 1094 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1095 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1096 int ierr; 1097 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1098 1099 // Backend version 1100 if (op->LinearAssembleQFunctionUpdate) { 1101 if (op->has_qf_assembled) { 1102 ierr = op->LinearAssembleQFunctionUpdate(op, op->qf_assembled, 1103 op->qf_assembled_rstr, request); 1104 } else { 1105 ierr = op->LinearAssembleQFunction(op, &op->qf_assembled, 1106 &op->qf_assembled_rstr, request); 1107 } 1108 CeedChk(ierr); 1109 op->has_qf_assembled = true; 1110 // Copy reference to internally held copy 1111 *assembled = NULL; 1112 *rstr = NULL; 1113 ierr = CeedVectorReferenceCopy(op->qf_assembled, assembled); CeedChk(ierr); 1114 ierr = CeedElemRestrictionReferenceCopy(op->qf_assembled_rstr, rstr); 1115 } else { 1116 // Fallback to reference Ceed 1117 if (!op->op_fallback) { 1118 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1119 } 1120 // Assemble 1121 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1122 assembled, rstr, request); CeedChk(ierr); 1123 } 1124 CeedChk(ierr); 1125 1126 return CEED_ERROR_SUCCESS; 1127 } 1128 1129 /** 1130 @brief Assemble the diagonal of a square linear CeedOperator 1131 1132 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1133 1134 Note: Currently only non-composite CeedOperators with a single field and 1135 composite CeedOperators with single field sub-operators are supported. 1136 1137 Note: Calling this function asserts that setup is complete 1138 and sets the CeedOperator as immutable. 1139 1140 @param op CeedOperator to assemble CeedQFunction 1141 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1142 @param request Address of CeedRequest for non-blocking completion, else 1143 @ref CEED_REQUEST_IMMEDIATE 1144 1145 @return An error code: 0 - success, otherwise - failure 1146 1147 @ref User 1148 **/ 1149 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1150 CeedRequest *request) { 1151 int ierr; 1152 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1153 1154 // Use backend version, if available 1155 if (op->LinearAssembleDiagonal) { 1156 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1157 return CEED_ERROR_SUCCESS; 1158 } else if (op->LinearAssembleAddDiagonal) { 1159 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1160 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1161 return CEED_ERROR_SUCCESS; 1162 } else { 1163 // Check for valid fallback resource 1164 const char *resource, *fallback_resource; 1165 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1166 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1167 CeedChk(ierr); 1168 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1169 // Fallback to reference Ceed 1170 if (!op->op_fallback) { 1171 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1172 } 1173 // Assemble 1174 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1175 CeedChk(ierr); 1176 return CEED_ERROR_SUCCESS; 1177 } 1178 } 1179 1180 // Default interface implementation 1181 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1182 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1183 CeedChk(ierr); 1184 return CEED_ERROR_SUCCESS; 1185 } 1186 1187 /** 1188 @brief Assemble the diagonal of a square linear CeedOperator 1189 1190 This sums into a CeedVector the diagonal of a linear CeedOperator. 1191 1192 Note: Currently only non-composite CeedOperators with a single field and 1193 composite CeedOperators with single field sub-operators are supported. 1194 1195 Note: Calling this function asserts that setup is complete 1196 and sets the CeedOperator as immutable. 1197 1198 @param op CeedOperator to assemble CeedQFunction 1199 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1200 @param request Address of CeedRequest for non-blocking completion, else 1201 @ref CEED_REQUEST_IMMEDIATE 1202 1203 @return An error code: 0 - success, otherwise - failure 1204 1205 @ref User 1206 **/ 1207 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1208 CeedRequest *request) { 1209 int ierr; 1210 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1211 1212 // Use backend version, if available 1213 if (op->LinearAssembleAddDiagonal) { 1214 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1215 return CEED_ERROR_SUCCESS; 1216 } else { 1217 // Check for valid fallback resource 1218 const char *resource, *fallback_resource; 1219 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1220 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1221 CeedChk(ierr); 1222 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1223 // Fallback to reference Ceed 1224 if (!op->op_fallback) { 1225 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1226 } 1227 // Assemble 1228 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1229 request); CeedChk(ierr); 1230 return CEED_ERROR_SUCCESS; 1231 } 1232 } 1233 1234 // Default interface implementation 1235 bool is_composite; 1236 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1237 if (is_composite) { 1238 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1239 false, assembled); CeedChk(ierr); 1240 return CEED_ERROR_SUCCESS; 1241 } else { 1242 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1243 CeedChk(ierr); 1244 return CEED_ERROR_SUCCESS; 1245 } 1246 } 1247 1248 /** 1249 @brief Assemble the point block diagonal of a square linear CeedOperator 1250 1251 This overwrites a CeedVector with the point block diagonal of a linear 1252 CeedOperator. 1253 1254 Note: Currently only non-composite CeedOperators with a single field and 1255 composite CeedOperators with single field sub-operators are supported. 1256 1257 Note: Calling this function asserts that setup is complete 1258 and sets the CeedOperator as immutable. 1259 1260 @param op CeedOperator to assemble CeedQFunction 1261 @param[out] assembled CeedVector to store assembled CeedOperator point block 1262 diagonal, provided in row-major form with an 1263 @a num_comp * @a num_comp block at each node. The dimensions 1264 of this vector are derived from the active vector 1265 for the CeedOperator. The array has shape 1266 [nodes, component out, component in]. 1267 @param request Address of CeedRequest for non-blocking completion, else 1268 CEED_REQUEST_IMMEDIATE 1269 1270 @return An error code: 0 - success, otherwise - failure 1271 1272 @ref User 1273 **/ 1274 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1275 CeedVector assembled, CeedRequest *request) { 1276 int ierr; 1277 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1278 1279 // Use backend version, if available 1280 if (op->LinearAssemblePointBlockDiagonal) { 1281 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1282 CeedChk(ierr); 1283 return CEED_ERROR_SUCCESS; 1284 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1285 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1286 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1287 request); CeedChk(ierr); 1288 return CEED_ERROR_SUCCESS; 1289 } else { 1290 // Check for valid fallback resource 1291 const char *resource, *fallback_resource; 1292 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1293 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1294 CeedChk(ierr); 1295 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1296 // Fallback to reference Ceed 1297 if (!op->op_fallback) { 1298 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1299 } 1300 // Assemble 1301 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1302 assembled, request); CeedChk(ierr); 1303 return CEED_ERROR_SUCCESS; 1304 } 1305 } 1306 1307 // Default interface implementation 1308 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1309 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1310 CeedChk(ierr); 1311 return CEED_ERROR_SUCCESS; 1312 } 1313 1314 /** 1315 @brief Assemble the point block diagonal of a square linear CeedOperator 1316 1317 This sums into a CeedVector with the point block diagonal of a linear 1318 CeedOperator. 1319 1320 Note: Currently only non-composite CeedOperators with a single field and 1321 composite CeedOperators with single field sub-operators are supported. 1322 1323 Note: Calling this function asserts that setup is complete 1324 and sets the CeedOperator as immutable. 1325 1326 @param op CeedOperator to assemble CeedQFunction 1327 @param[out] assembled CeedVector to store assembled CeedOperator point block 1328 diagonal, provided in row-major form with an 1329 @a num_comp * @a num_comp block at each node. The dimensions 1330 of this vector are derived from the active vector 1331 for the CeedOperator. The array has shape 1332 [nodes, component out, component in]. 1333 @param request Address of CeedRequest for non-blocking completion, else 1334 CEED_REQUEST_IMMEDIATE 1335 1336 @return An error code: 0 - success, otherwise - failure 1337 1338 @ref User 1339 **/ 1340 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1341 CeedVector assembled, CeedRequest *request) { 1342 int ierr; 1343 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1344 1345 // Use backend version, if available 1346 if (op->LinearAssembleAddPointBlockDiagonal) { 1347 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1348 CeedChk(ierr); 1349 return CEED_ERROR_SUCCESS; 1350 } else { 1351 // Check for valid fallback resource 1352 const char *resource, *fallback_resource; 1353 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1354 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1355 CeedChk(ierr); 1356 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1357 // Fallback to reference Ceed 1358 if (!op->op_fallback) { 1359 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1360 } 1361 // Assemble 1362 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1363 assembled, request); CeedChk(ierr); 1364 return CEED_ERROR_SUCCESS; 1365 } 1366 } 1367 1368 // Default interface implemenation 1369 bool is_composite; 1370 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1371 if (is_composite) { 1372 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1373 true, assembled); CeedChk(ierr); 1374 return CEED_ERROR_SUCCESS; 1375 } else { 1376 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1377 CeedChk(ierr); 1378 return CEED_ERROR_SUCCESS; 1379 } 1380 } 1381 1382 /** 1383 @brief Fully assemble the nonzero pattern of a linear operator. 1384 1385 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1386 1387 The assembly routines use coordinate format, with num_entries tuples of the 1388 form (i, j, value) which indicate that value should be added to the matrix 1389 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1390 This function returns the number of entries and their (i, j) locations, 1391 while CeedOperatorLinearAssemble() provides the values in the same 1392 ordering. 1393 1394 This will generally be slow unless your operator is low-order. 1395 1396 Note: Calling this function asserts that setup is complete 1397 and sets the CeedOperator as immutable. 1398 1399 @param[in] op CeedOperator to assemble 1400 @param[out] num_entries Number of entries in coordinate nonzero pattern 1401 @param[out] rows Row number for each entry 1402 @param[out] cols Column number for each entry 1403 1404 @ref User 1405 **/ 1406 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries, 1407 CeedInt **rows, CeedInt **cols) { 1408 int ierr; 1409 CeedInt num_suboperators, single_entries; 1410 CeedOperator *sub_operators; 1411 bool is_composite; 1412 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1413 1414 // Use backend version, if available 1415 if (op->LinearAssembleSymbolic) { 1416 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1417 return CEED_ERROR_SUCCESS; 1418 } else { 1419 // Check for valid fallback resource 1420 const char *resource, *fallback_resource; 1421 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1422 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1423 CeedChk(ierr); 1424 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1425 // Fallback to reference Ceed 1426 if (!op->op_fallback) { 1427 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1428 } 1429 // Assemble 1430 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1431 cols); CeedChk(ierr); 1432 return CEED_ERROR_SUCCESS; 1433 } 1434 } 1435 1436 // Default interface implementation 1437 1438 // count entries and allocate rows, cols arrays 1439 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1440 *num_entries = 0; 1441 if (is_composite) { 1442 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1443 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1444 for (int k = 0; k < num_suboperators; ++k) { 1445 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1446 &single_entries); CeedChk(ierr); 1447 *num_entries += single_entries; 1448 } 1449 } else { 1450 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1451 &single_entries); CeedChk(ierr); 1452 *num_entries += single_entries; 1453 } 1454 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1455 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1456 1457 // assemble nonzero locations 1458 CeedInt offset = 0; 1459 if (is_composite) { 1460 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1461 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1462 for (int k = 0; k < num_suboperators; ++k) { 1463 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1464 *cols); CeedChk(ierr); 1465 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1466 &single_entries); 1467 CeedChk(ierr); 1468 offset += single_entries; 1469 } 1470 } else { 1471 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1472 CeedChk(ierr); 1473 } 1474 1475 return CEED_ERROR_SUCCESS; 1476 } 1477 1478 /** 1479 @brief Fully assemble the nonzero entries of a linear operator. 1480 1481 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1482 1483 The assembly routines use coordinate format, with num_entries tuples of the 1484 form (i, j, value) which indicate that value should be added to the matrix 1485 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1486 This function returns the values of the nonzero entries to be added, their 1487 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1488 1489 This will generally be slow unless your operator is low-order. 1490 1491 Note: Calling this function asserts that setup is complete 1492 and sets the CeedOperator as immutable. 1493 1494 @param[in] op CeedOperator to assemble 1495 @param[out] values Values to assemble into matrix 1496 1497 @ref User 1498 **/ 1499 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1500 int ierr; 1501 CeedInt num_suboperators, single_entries = 0; 1502 CeedOperator *sub_operators; 1503 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1504 1505 // Use backend version, if available 1506 if (op->LinearAssemble) { 1507 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1508 return CEED_ERROR_SUCCESS; 1509 } else { 1510 // Check for valid fallback resource 1511 const char *resource, *fallback_resource; 1512 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1513 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1514 CeedChk(ierr); 1515 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1516 // Fallback to reference Ceed 1517 if (!op->op_fallback) { 1518 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1519 } 1520 // Assemble 1521 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1522 return CEED_ERROR_SUCCESS; 1523 } 1524 } 1525 1526 // Default interface implementation 1527 bool is_composite; 1528 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1529 1530 CeedInt offset = 0; 1531 if (is_composite) { 1532 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1533 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1534 for (int k = 0; k < num_suboperators; ++k) { 1535 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1536 CeedChk(ierr); 1537 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1538 &single_entries); 1539 CeedChk(ierr); 1540 offset += single_entries; 1541 } 1542 } else { 1543 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1544 } 1545 1546 return CEED_ERROR_SUCCESS; 1547 } 1548 1549 /** 1550 @brief Create a multigrid coarse operator and level transfer operators 1551 for a CeedOperator, creating the prolongation basis from the 1552 fine and coarse grid interpolation 1553 1554 Note: Calling this function asserts that setup is complete 1555 and sets the CeedOperator as immutable. 1556 1557 @param[in] op_fine Fine grid operator 1558 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1559 @param[in] rstr_coarse Coarse grid restriction 1560 @param[in] basis_coarse Coarse grid active vector basis 1561 @param[out] op_coarse Coarse grid operator 1562 @param[out] op_prolong Coarse to fine operator 1563 @param[out] op_restrict Fine to coarse operator 1564 1565 @return An error code: 0 - success, otherwise - failure 1566 1567 @ref User 1568 **/ 1569 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1570 CeedVector p_mult_fine, 1571 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1572 CeedOperator *op_coarse, CeedOperator *op_prolong, 1573 CeedOperator *op_restrict) { 1574 int ierr; 1575 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1576 Ceed ceed; 1577 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1578 1579 // Check for compatible quadrature spaces 1580 CeedBasis basis_fine; 1581 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1582 CeedInt Q_f, Q_c; 1583 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1584 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1585 if (Q_f != Q_c) 1586 // LCOV_EXCL_START 1587 return CeedError(ceed, CEED_ERROR_DIMENSION, 1588 "Bases must have compatible quadrature spaces"); 1589 // LCOV_EXCL_STOP 1590 1591 // Coarse to fine basis 1592 CeedInt P_f, P_c, Q = Q_f; 1593 bool is_tensor_f, is_tensor_c; 1594 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1595 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1596 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1597 if (is_tensor_f && is_tensor_c) { 1598 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1599 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1600 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1601 } else if (!is_tensor_f && !is_tensor_c) { 1602 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1603 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1604 } else { 1605 // LCOV_EXCL_START 1606 return CeedError(ceed, CEED_ERROR_MINOR, 1607 "Bases must both be tensor or non-tensor"); 1608 // LCOV_EXCL_STOP 1609 } 1610 1611 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1612 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1613 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1614 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1615 if (is_tensor_f) { 1616 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1617 memcpy(interp_c, basis_coarse->interp_1d, 1618 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1619 } else { 1620 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1621 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1622 } 1623 1624 // -- QR Factorization, interp_f = Q R 1625 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1626 1627 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1628 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1629 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1630 1631 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1632 for (CeedInt j=0; j<P_c; j++) { // Column j 1633 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1634 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1635 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1636 for (CeedInt k=i+1; k<P_f; k++) 1637 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1638 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1639 } 1640 } 1641 ierr = CeedFree(&tau); CeedChk(ierr); 1642 ierr = CeedFree(&interp_c); CeedChk(ierr); 1643 ierr = CeedFree(&interp_f); CeedChk(ierr); 1644 1645 // Complete with interp_c_to_f versions of code 1646 if (is_tensor_f) { 1647 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1648 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1649 CeedChk(ierr); 1650 } else { 1651 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1652 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1653 CeedChk(ierr); 1654 } 1655 1656 // Cleanup 1657 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1658 return CEED_ERROR_SUCCESS; 1659 } 1660 1661 /** 1662 @brief Create a multigrid coarse operator and level transfer operators 1663 for a CeedOperator with a tensor basis for the active basis 1664 1665 Note: Calling this function asserts that setup is complete 1666 and sets the CeedOperator as immutable. 1667 1668 @param[in] op_fine Fine grid operator 1669 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1670 @param[in] rstr_coarse Coarse grid restriction 1671 @param[in] basis_coarse Coarse grid active vector basis 1672 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1673 @param[out] op_coarse Coarse grid operator 1674 @param[out] op_prolong Coarse to fine operator 1675 @param[out] op_restrict Fine to coarse operator 1676 1677 @return An error code: 0 - success, otherwise - failure 1678 1679 @ref User 1680 **/ 1681 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1682 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1683 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1684 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1685 int ierr; 1686 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1687 Ceed ceed; 1688 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1689 1690 // Check for compatible quadrature spaces 1691 CeedBasis basis_fine; 1692 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1693 CeedInt Q_f, Q_c; 1694 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1695 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1696 if (Q_f != Q_c) 1697 // LCOV_EXCL_START 1698 return CeedError(ceed, CEED_ERROR_DIMENSION, 1699 "Bases must have compatible quadrature spaces"); 1700 // LCOV_EXCL_STOP 1701 1702 // Coarse to fine basis 1703 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1704 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1705 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1706 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1707 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1708 CeedChk(ierr); 1709 P_1d_c = dim == 1 ? num_nodes_c : 1710 dim == 2 ? sqrt(num_nodes_c) : 1711 cbrt(num_nodes_c); 1712 CeedScalar *q_ref, *q_weight, *grad; 1713 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1714 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1715 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1716 CeedBasis basis_c_to_f; 1717 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1718 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1719 CeedChk(ierr); 1720 ierr = CeedFree(&q_ref); CeedChk(ierr); 1721 ierr = CeedFree(&q_weight); CeedChk(ierr); 1722 ierr = CeedFree(&grad); CeedChk(ierr); 1723 1724 // Core code 1725 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1726 basis_coarse, basis_c_to_f, op_coarse, 1727 op_prolong, op_restrict); 1728 CeedChk(ierr); 1729 return CEED_ERROR_SUCCESS; 1730 } 1731 1732 /** 1733 @brief Create a multigrid coarse operator and level transfer operators 1734 for a CeedOperator with a non-tensor basis for the active vector 1735 1736 Note: Calling this function asserts that setup is complete 1737 and sets the CeedOperator as immutable. 1738 1739 @param[in] op_fine Fine grid operator 1740 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1741 @param[in] rstr_coarse Coarse grid restriction 1742 @param[in] basis_coarse Coarse grid active vector basis 1743 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1744 @param[out] op_coarse Coarse grid operator 1745 @param[out] op_prolong Coarse to fine operator 1746 @param[out] op_restrict Fine to coarse operator 1747 1748 @return An error code: 0 - success, otherwise - failure 1749 1750 @ref User 1751 **/ 1752 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1753 CeedVector p_mult_fine, 1754 CeedElemRestriction rstr_coarse, 1755 CeedBasis basis_coarse, 1756 const CeedScalar *interp_c_to_f, 1757 CeedOperator *op_coarse, 1758 CeedOperator *op_prolong, 1759 CeedOperator *op_restrict) { 1760 int ierr; 1761 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1762 Ceed ceed; 1763 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1764 1765 // Check for compatible quadrature spaces 1766 CeedBasis basis_fine; 1767 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1768 CeedInt Q_f, Q_c; 1769 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1770 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1771 if (Q_f != Q_c) 1772 // LCOV_EXCL_START 1773 return CeedError(ceed, CEED_ERROR_DIMENSION, 1774 "Bases must have compatible quadrature spaces"); 1775 // LCOV_EXCL_STOP 1776 1777 // Coarse to fine basis 1778 CeedElemTopology topo; 1779 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1780 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1781 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1782 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1783 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1784 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1785 CeedChk(ierr); 1786 CeedScalar *q_ref, *q_weight, *grad; 1787 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 1788 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1789 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 1790 CeedBasis basis_c_to_f; 1791 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 1792 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1793 CeedChk(ierr); 1794 ierr = CeedFree(&q_ref); CeedChk(ierr); 1795 ierr = CeedFree(&q_weight); CeedChk(ierr); 1796 ierr = CeedFree(&grad); CeedChk(ierr); 1797 1798 // Core code 1799 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1800 basis_coarse, basis_c_to_f, op_coarse, 1801 op_prolong, op_restrict); 1802 CeedChk(ierr); 1803 return CEED_ERROR_SUCCESS; 1804 } 1805 1806 /** 1807 @brief Build a FDM based approximate inverse for each element for a 1808 CeedOperator 1809 1810 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 1811 Method based approximate inverse. This function obtains the simultaneous 1812 diagonalization for the 1D mass and Laplacian operators, 1813 M = V^T V, K = V^T S V. 1814 The assembled QFunction is used to modify the eigenvalues from simultaneous 1815 diagonalization and obtain an approximate inverse of the form 1816 V^T S^hat V. The CeedOperator must be linear and non-composite. The 1817 associated CeedQFunction must therefore also be linear. 1818 1819 Note: Calling this function asserts that setup is complete 1820 and sets the CeedOperator as immutable. 1821 1822 @param op CeedOperator to create element inverses 1823 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 1824 for each element 1825 @param request Address of CeedRequest for non-blocking completion, else 1826 @ref CEED_REQUEST_IMMEDIATE 1827 1828 @return An error code: 0 - success, otherwise - failure 1829 1830 @ref Backend 1831 **/ 1832 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 1833 CeedRequest *request) { 1834 int ierr; 1835 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1836 1837 // Use backend version, if available 1838 if (op->CreateFDMElementInverse) { 1839 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 1840 return CEED_ERROR_SUCCESS; 1841 } else { 1842 // Check for valid fallback resource 1843 const char *resource, *fallback_resource; 1844 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1845 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1846 CeedChk(ierr); 1847 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1848 // Fallback to reference Ceed 1849 if (!op->op_fallback) { 1850 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1851 } 1852 // Assemble 1853 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 1854 CeedChk(ierr); 1855 return CEED_ERROR_SUCCESS; 1856 } 1857 } 1858 1859 // Interface implementation 1860 Ceed ceed, ceed_parent; 1861 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1862 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 1863 ceed_parent = ceed_parent ? ceed_parent : ceed; 1864 CeedQFunction qf; 1865 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 1866 1867 // Determine active input basis 1868 bool interp = false, grad = false; 1869 CeedBasis basis = NULL; 1870 CeedElemRestriction rstr = NULL; 1871 CeedOperatorField *op_fields; 1872 CeedQFunctionField *qf_fields; 1873 CeedInt num_input_fields; 1874 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 1875 CeedChk(ierr); 1876 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 1877 for (CeedInt i=0; i<num_input_fields; i++) { 1878 CeedVector vec; 1879 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 1880 if (vec == CEED_VECTOR_ACTIVE) { 1881 CeedEvalMode eval_mode; 1882 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 1883 interp = interp || eval_mode == CEED_EVAL_INTERP; 1884 grad = grad || eval_mode == CEED_EVAL_GRAD; 1885 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 1886 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 1887 } 1888 } 1889 if (!basis) 1890 // LCOV_EXCL_START 1891 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 1892 // LCOV_EXCL_STOP 1893 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1, 1894 l_size = 1; 1895 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 1896 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 1897 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 1898 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 1899 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 1900 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 1901 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 1902 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 1903 1904 // Build and diagonalize 1D Mass and Laplacian 1905 bool tensor_basis; 1906 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 1907 if (!tensor_basis) 1908 // LCOV_EXCL_START 1909 return CeedError(ceed, CEED_ERROR_BACKEND, 1910 "FDMElementInverse only supported for tensor " 1911 "bases"); 1912 // LCOV_EXCL_STOP 1913 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 1914 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 1915 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 1916 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 1917 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 1918 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 1919 // -- Build matrices 1920 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 1921 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 1922 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 1923 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 1924 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 1925 mass, laplace); CeedChk(ierr); 1926 1927 // -- Diagonalize 1928 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 1929 CeedChk(ierr); 1930 ierr = CeedFree(&mass); CeedChk(ierr); 1931 ierr = CeedFree(&laplace); CeedChk(ierr); 1932 for (CeedInt i=0; i<P_1d; i++) 1933 for (CeedInt j=0; j<P_1d; j++) 1934 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 1935 ierr = CeedFree(&x); CeedChk(ierr); 1936 1937 // Assemble QFunction 1938 CeedVector assembled; 1939 CeedElemRestriction rstr_qf; 1940 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 1941 &rstr_qf, request); CeedChk(ierr); 1942 CeedInt layout[3]; 1943 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 1944 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 1945 CeedScalar max_norm = 0; 1946 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 1947 1948 // Calculate element averages 1949 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 1950 CeedScalar *elem_avg; 1951 const CeedScalar *assembled_array, *q_weight_array; 1952 CeedVector q_weight; 1953 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 1954 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 1955 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 1956 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 1957 CeedChk(ierr); 1958 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 1959 CeedChk(ierr); 1960 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 1961 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 1962 for (CeedInt e=0; e<num_elem; e++) { 1963 CeedInt count = 0; 1964 for (CeedInt q=0; q<num_qpts; q++) 1965 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 1966 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 1967 qf_value_bound) { 1968 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 1969 q_weight_array[q]; 1970 count++; 1971 } 1972 if (count) { 1973 elem_avg[e] /= count; 1974 } else { 1975 elem_avg[e] = 1.0; 1976 } 1977 } 1978 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 1979 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 1980 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 1981 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 1982 1983 // Build FDM diagonal 1984 CeedVector q_data; 1985 CeedScalar *q_data_array, *fdm_diagonal; 1986 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 1987 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 1988 for (CeedInt c=0; c<num_comp; c++) 1989 for (CeedInt n=0; n<elem_size; n++) { 1990 if (interp) 1991 fdm_diagonal[c*elem_size + n] = 1.0; 1992 if (grad) 1993 for (CeedInt d=0; d<dim; d++) { 1994 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 1995 fdm_diagonal[c*elem_size + n] += lambda[i]; 1996 } 1997 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 1998 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 1999 } 2000 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2001 CeedChk(ierr); 2002 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2003 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2004 CeedChk(ierr); 2005 for (CeedInt e=0; e<num_elem; e++) 2006 for (CeedInt c=0; c<num_comp; c++) 2007 for (CeedInt n=0; n<elem_size; n++) 2008 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2009 fdm_diagonal[c*elem_size + n]); 2010 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2011 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2012 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2013 2014 // Setup FDM operator 2015 // -- Basis 2016 CeedBasis fdm_basis; 2017 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2018 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2019 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2020 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2021 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2022 fdm_interp, grad_dummy, q_ref_dummy, 2023 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2024 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2025 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2026 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2027 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2028 ierr = CeedFree(&lambda); CeedChk(ierr); 2029 2030 // -- Restriction 2031 CeedElemRestriction rstr_qd_i; 2032 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2033 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2034 num_comp, num_elem*num_comp*elem_size, 2035 strides, &rstr_qd_i); CeedChk(ierr); 2036 // -- QFunction 2037 CeedQFunction qf_fdm; 2038 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2039 CeedChk(ierr); 2040 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2041 CeedChk(ierr); 2042 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2043 CeedChk(ierr); 2044 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2045 CeedChk(ierr); 2046 // -- QFunction context 2047 CeedInt *num_comp_data; 2048 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2049 num_comp_data[0] = num_comp; 2050 CeedQFunctionContext ctx_fdm; 2051 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2052 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2053 sizeof(*num_comp_data), num_comp_data); 2054 CeedChk(ierr); 2055 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2056 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2057 // -- Operator 2058 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2059 CeedChk(ierr); 2060 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2061 CeedChk(ierr); 2062 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2063 q_data); CeedChk(ierr); 2064 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2065 CeedChk(ierr); 2066 2067 // Cleanup 2068 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2069 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2070 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2071 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2072 2073 return CEED_ERROR_SUCCESS; 2074 } 2075 2076 /// @} 2077