1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->is_interface_setup = false; 75 op_ref->is_backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 op->op_fallback = op_ref; 79 80 // Clone QF 81 CeedQFunction qf_ref; 82 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 83 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 84 qf_ref->data = NULL; 85 qf_ref->ceed = ceed_ref; 86 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 87 op_ref->qf = qf_ref; 88 op->qf_fallback = qf_ref; 89 return CEED_ERROR_SUCCESS; 90 } 91 92 /** 93 @brief Select correct basis matrix pointer based on CeedEvalMode 94 95 @param[in] eval_mode Current basis evaluation mode 96 @param[in] identity Pointer to identity matrix 97 @param[in] interp Pointer to interpolation matrix 98 @param[in] grad Pointer to gradient matrix 99 @param[out] basis_ptr Basis pointer to set 100 101 @return none 102 103 @ref Developer 104 **/ 105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 106 const CeedScalar *identity, const CeedScalar *interp, 107 const CeedScalar *grad, const CeedScalar **basis_ptr) { 108 switch (eval_mode) { 109 case CEED_EVAL_NONE: 110 *basis_ptr = identity; 111 break; 112 case CEED_EVAL_INTERP: 113 *basis_ptr = interp; 114 break; 115 case CEED_EVAL_GRAD: 116 *basis_ptr = grad; 117 break; 118 case CEED_EVAL_WEIGHT: 119 case CEED_EVAL_DIV: 120 case CEED_EVAL_CURL: 121 break; // Caught by QF Assembly 122 } 123 } 124 125 /** 126 @brief Create point block restriction for active operator field 127 128 @param[in] rstr Original CeedElemRestriction for active field 129 @param[out] pointblock_rstr Address of the variable where the newly created 130 CeedElemRestriction will be stored 131 132 @return An error code: 0 - success, otherwise - failure 133 134 @ref Developer 135 **/ 136 static int CeedOperatorCreateActivePointBlockRestriction( 137 CeedElemRestriction rstr, 138 CeedElemRestriction *pointblock_rstr) { 139 int ierr; 140 Ceed ceed; 141 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 142 const CeedInt *offsets; 143 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 144 CeedChk(ierr); 145 146 // Expand offsets 147 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 148 *pointblock_offsets; 149 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 152 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 153 CeedInt shift = num_comp; 154 if (comp_stride != 1) 155 shift *= num_comp; 156 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 157 CeedChk(ierr); 158 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 159 pointblock_offsets[i] = offsets[i]*shift; 160 if (pointblock_offsets[i] > max) 161 max = pointblock_offsets[i]; 162 } 163 164 // Create new restriction 165 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 166 1, max + num_comp*num_comp, CEED_MEM_HOST, 167 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 168 CeedChk(ierr); 169 170 // Cleanup 171 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 172 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else 181 CEED_REQUEST_IMMEDIATE 182 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 183 @param[out] assembled CeedVector to store assembled diagonal 184 185 @return An error code: 0 - success, otherwise - failure 186 187 @ref Developer 188 **/ 189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 190 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 191 int ierr; 192 Ceed ceed; 193 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 194 195 // Assemble QFunction 196 CeedQFunction qf; 197 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 198 CeedInt num_input_fields, num_output_fields; 199 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 200 CeedChk(ierr); 201 CeedVector assembled_qf; 202 CeedElemRestriction rstr; 203 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 204 &rstr, request); CeedChk(ierr); 205 CeedInt layout[3]; 206 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 207 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 208 CeedScalar max_norm = 0; 209 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 210 211 // Determine active input basis 212 CeedOperatorField *op_fields; 213 CeedQFunctionField *qf_fields; 214 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 215 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 216 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 217 CeedEvalMode *eval_mode_in = NULL; 218 CeedBasis basis_in = NULL; 219 CeedElemRestriction rstr_in = NULL; 220 for (CeedInt i=0; i<num_input_fields; i++) { 221 CeedVector vec; 222 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 223 if (vec == CEED_VECTOR_ACTIVE) { 224 CeedElemRestriction rstr; 225 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 226 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 227 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 228 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 229 if (rstr_in && rstr_in != rstr) 230 // LCOV_EXCL_START 231 return CeedError(ceed, CEED_ERROR_BACKEND, 232 "Multi-field non-composite operator diagonal assembly not supported"); 233 // LCOV_EXCL_STOP 234 rstr_in = rstr; 235 CeedEvalMode eval_mode; 236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 237 switch (eval_mode) { 238 case CEED_EVAL_NONE: 239 case CEED_EVAL_INTERP: 240 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 241 eval_mode_in[num_eval_mode_in] = eval_mode; 242 num_eval_mode_in += 1; 243 break; 244 case CEED_EVAL_GRAD: 245 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 246 for (CeedInt d=0; d<dim; d++) 247 eval_mode_in[num_eval_mode_in+d] = eval_mode; 248 num_eval_mode_in += dim; 249 break; 250 case CEED_EVAL_WEIGHT: 251 case CEED_EVAL_DIV: 252 case CEED_EVAL_CURL: 253 break; // Caught by QF Assembly 254 } 255 } 256 } 257 258 // Determine active output basis 259 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 260 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 261 CeedInt num_eval_mode_out = 0; 262 CeedEvalMode *eval_mode_out = NULL; 263 CeedBasis basis_out = NULL; 264 CeedElemRestriction rstr_out = NULL; 265 for (CeedInt i=0; i<num_output_fields; i++) { 266 CeedVector vec; 267 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 268 if (vec == CEED_VECTOR_ACTIVE) { 269 CeedElemRestriction rstr; 270 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 271 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 272 if (rstr_out && rstr_out != rstr) 273 // LCOV_EXCL_START 274 return CeedError(ceed, CEED_ERROR_BACKEND, 275 "Multi-field non-composite operator diagonal assembly not supported"); 276 // LCOV_EXCL_STOP 277 rstr_out = rstr; 278 CeedEvalMode eval_mode; 279 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 280 switch (eval_mode) { 281 case CEED_EVAL_NONE: 282 case CEED_EVAL_INTERP: 283 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 284 eval_mode_out[num_eval_mode_out] = eval_mode; 285 num_eval_mode_out += 1; 286 break; 287 case CEED_EVAL_GRAD: 288 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 289 for (CeedInt d=0; d<dim; d++) 290 eval_mode_out[num_eval_mode_out+d] = eval_mode; 291 num_eval_mode_out += dim; 292 break; 293 case CEED_EVAL_WEIGHT: 294 case CEED_EVAL_DIV: 295 case CEED_EVAL_CURL: 296 break; // Caught by QF Assembly 297 } 298 } 299 } 300 301 // Assemble point block diagonal restriction, if needed 302 CeedElemRestriction diag_rstr = rstr_out; 303 if (is_pointblock) { 304 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 305 CeedChk(ierr); 306 } 307 308 // Create diagonal vector 309 CeedVector elem_diag; 310 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 311 CeedChk(ierr); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array, *assembled_qf_array; 315 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 316 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 317 CeedChk(ierr); 318 ierr = CeedVectorGetArray(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 319 CeedChk(ierr); 320 CeedInt num_elem, num_nodes, num_qpts; 321 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 322 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 323 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 324 // Basis matrices 325 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 326 CeedScalar *identity = NULL; 327 bool evalNone = false; 328 for (CeedInt i=0; i<num_eval_mode_in; i++) 329 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 330 for (CeedInt i=0; i<num_eval_mode_out; i++) 331 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 332 if (evalNone) { 333 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 334 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 335 identity[i*num_nodes+i] = 1.0; 336 } 337 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 338 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 339 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 340 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 341 // Compute the diagonal of B^T D B 342 // Each element 343 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 344 for (CeedInt e=0; e<num_elem; e++) { 345 CeedInt d_out = -1; 346 // Each basis eval mode pair 347 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 348 const CeedScalar *bt = NULL; 349 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 350 d_out += 1; 351 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 352 &grad_out[d_out*num_qpts*num_nodes], &bt); 353 CeedInt d_in = -1; 354 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 355 const CeedScalar *b = NULL; 356 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 357 d_in += 1; 358 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 359 &grad_in[d_in*num_qpts*num_nodes], &b); 360 // Each component 361 for (CeedInt c_out=0; c_out<num_comp; c_out++) 362 // Each qpoint/node pair 363 for (CeedInt q=0; q<num_qpts; q++) 364 if (is_pointblock) { 365 // Point Block Diagonal 366 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 367 const CeedScalar qf_value = 368 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 369 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 370 if (fabs(qf_value) > qf_value_bound) 371 for (CeedInt n=0; n<num_nodes; n++) 372 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 373 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 374 } 375 } else { 376 // Diagonal Only 377 const CeedScalar qf_value = 378 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 379 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 380 if (fabs(qf_value) > qf_value_bound) 381 for (CeedInt n=0; n<num_nodes; n++) 382 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 383 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 384 } 385 } 386 } 387 } 388 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 389 ierr = CeedVectorRestoreArray(assembled_qf, &assembled_qf_array); CeedChk(ierr); 390 391 // Assemble local operator diagonal 392 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 393 assembled, request); CeedChk(ierr); 394 395 // Cleanup 396 if (is_pointblock) { 397 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 398 } 399 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 400 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 401 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 402 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 403 ierr = CeedFree(&identity); CeedChk(ierr); 404 405 return CEED_ERROR_SUCCESS; 406 } 407 408 /** 409 @brief Core logic for assembling composite operator diagonal 410 411 @param[in] op CeedOperator to assemble point block diagonal 412 @param[in] request Address of CeedRequest for non-blocking completion, else 413 CEED_REQUEST_IMMEDIATE 414 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 415 @param[out] assembled CeedVector to store assembled diagonal 416 417 @return An error code: 0 - success, otherwise - failure 418 419 @ref Developer 420 **/ 421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 422 CeedOperator op, CeedRequest *request, const bool is_pointblock, 423 CeedVector assembled) { 424 int ierr; 425 CeedInt num_sub; 426 CeedOperator *suboperators; 427 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 428 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 429 for (CeedInt i = 0; i < num_sub; i++) { 430 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 431 is_pointblock, assembled); CeedChk(ierr); 432 } 433 return CEED_ERROR_SUCCESS; 434 } 435 436 /** 437 @brief Build nonzero pattern for non-composite operator 438 439 Users should generally use CeedOperatorLinearAssembleSymbolic() 440 441 @param[in] op CeedOperator to assemble nonzero pattern 442 @param[in] offset Offset for number of entries 443 @param[out] rows Row number for each entry 444 @param[out] cols Column number for each entry 445 446 @return An error code: 0 - success, otherwise - failure 447 448 @ref Developer 449 **/ 450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 451 CeedInt *rows, CeedInt *cols) { 452 int ierr; 453 Ceed ceed = op->ceed; 454 if (op->is_composite) 455 // LCOV_EXCL_START 456 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 457 "Composite operator not supported"); 458 // LCOV_EXCL_STOP 459 460 CeedElemRestriction rstr_in; 461 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 462 CeedInt num_elem, elem_size, num_nodes, num_comp; 463 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 464 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 465 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 466 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 467 CeedInt layout_er[3]; 468 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 469 470 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 471 472 // Determine elem_dof relation 473 CeedVector index_vec; 474 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 475 CeedScalar *array; 476 ierr = CeedVectorGetArray(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 477 for (CeedInt i = 0; i < num_nodes; ++i) { 478 array[i] = i; 479 } 480 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 481 CeedVector elem_dof; 482 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 483 CeedChk(ierr); 484 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 485 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 486 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 487 const CeedScalar *elem_dof_a; 488 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 489 CeedChk(ierr); 490 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 491 492 // Determine i, j locations for element matrices 493 CeedInt count = 0; 494 for (int e = 0; e < num_elem; ++e) { 495 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 496 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 497 for (int i = 0; i < elem_size; ++i) { 498 for (int j = 0; j < elem_size; ++j) { 499 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 500 (comp_out)*layout_er[1] + e*layout_er[2]; 501 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 502 (comp_in)*layout_er[1] + e*layout_er[2]; 503 504 const CeedInt row = elem_dof_a[elem_dof_index_row]; 505 const CeedInt col = elem_dof_a[elem_dof_index_col]; 506 507 rows[offset + count] = row; 508 cols[offset + count] = col; 509 count++; 510 } 511 } 512 } 513 } 514 } 515 if (count != local_num_entries) 516 // LCOV_EXCL_START 517 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 518 // LCOV_EXCL_STOP 519 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 520 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 521 522 return CEED_ERROR_SUCCESS; 523 } 524 525 /** 526 @brief Assemble nonzero entries for non-composite operator 527 528 Users should generally use CeedOperatorLinearAssemble() 529 530 @param[in] op CeedOperator to assemble 531 @param[out] offset Offest for number of entries 532 @param[out] values Values to assemble into matrix 533 534 @return An error code: 0 - success, otherwise - failure 535 536 @ref Developer 537 **/ 538 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 539 CeedVector values) { 540 int ierr; 541 Ceed ceed = op->ceed; 542 if (op->is_composite) 543 // LCOV_EXCL_START 544 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 545 "Composite operator not supported"); 546 // LCOV_EXCL_STOP 547 548 // Assemble QFunction 549 CeedQFunction qf; 550 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 551 CeedVector assembled_qf; 552 CeedElemRestriction rstr_q; 553 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 554 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 555 556 CeedInt qf_length; 557 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 558 559 CeedInt num_input_fields, num_output_fields; 560 CeedOperatorField *input_fields; 561 CeedOperatorField *output_fields; 562 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 563 &num_output_fields, &output_fields); CeedChk(ierr); 564 565 // Determine active input basis 566 CeedQFunctionField *qf_fields; 567 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 568 CeedInt num_eval_mode_in = 0, dim = 1; 569 CeedEvalMode *eval_mode_in = NULL; 570 CeedBasis basis_in = NULL; 571 CeedElemRestriction rstr_in = NULL; 572 for (CeedInt i=0; i<num_input_fields; i++) { 573 CeedVector vec; 574 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 575 if (vec == CEED_VECTOR_ACTIVE) { 576 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 577 CeedChk(ierr); 578 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 579 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 580 CeedChk(ierr); 581 CeedEvalMode eval_mode; 582 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 583 CeedChk(ierr); 584 switch (eval_mode) { 585 case CEED_EVAL_NONE: 586 case CEED_EVAL_INTERP: 587 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 588 eval_mode_in[num_eval_mode_in] = eval_mode; 589 num_eval_mode_in += 1; 590 break; 591 case CEED_EVAL_GRAD: 592 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 593 for (CeedInt d=0; d<dim; d++) { 594 eval_mode_in[num_eval_mode_in+d] = eval_mode; 595 } 596 num_eval_mode_in += dim; 597 break; 598 case CEED_EVAL_WEIGHT: 599 case CEED_EVAL_DIV: 600 case CEED_EVAL_CURL: 601 break; // Caught by QF Assembly 602 } 603 } 604 } 605 606 // Determine active output basis 607 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 608 CeedInt num_eval_mode_out = 0; 609 CeedEvalMode *eval_mode_out = NULL; 610 CeedBasis basis_out = NULL; 611 CeedElemRestriction rstr_out = NULL; 612 for (CeedInt i=0; i<num_output_fields; i++) { 613 CeedVector vec; 614 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 615 if (vec == CEED_VECTOR_ACTIVE) { 616 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 617 CeedChk(ierr); 618 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 619 CeedChk(ierr); 620 CeedEvalMode eval_mode; 621 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 622 CeedChk(ierr); 623 switch (eval_mode) { 624 case CEED_EVAL_NONE: 625 case CEED_EVAL_INTERP: 626 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 627 eval_mode_out[num_eval_mode_out] = eval_mode; 628 num_eval_mode_out += 1; 629 break; 630 case CEED_EVAL_GRAD: 631 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 632 for (CeedInt d=0; d<dim; d++) { 633 eval_mode_out[num_eval_mode_out+d] = eval_mode; 634 } 635 num_eval_mode_out += dim; 636 break; 637 case CEED_EVAL_WEIGHT: 638 case CEED_EVAL_DIV: 639 case CEED_EVAL_CURL: 640 break; // Caught by QF Assembly 641 } 642 } 643 } 644 645 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 646 // LCOV_EXCL_START 647 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 648 "Cannot assemble operator with out inputs/outputs"); 649 // LCOV_EXCL_STOP 650 651 CeedInt num_elem, elem_size, num_qpts, num_comp; 652 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 653 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 654 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 655 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 656 657 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 658 659 // loop over elements and put in data structure 660 const CeedScalar *interp_in, *grad_in; 661 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 662 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 663 664 const CeedScalar *assembled_qf_array; 665 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 666 CeedChk(ierr); 667 668 CeedInt layout_qf[3]; 669 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 670 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 671 672 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 673 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 674 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 675 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 676 num_qpts]; // logically 3-tensor 677 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 678 CeedScalar elem_mat[elem_size * elem_size]; 679 int count = 0; 680 CeedScalar *vals; 681 ierr = CeedVectorGetArray(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 682 for (int e = 0; e < num_elem; ++e) { 683 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 684 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 685 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 686 B_mat_in[ell] = 0.0; 687 } 688 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 689 B_mat_out[ell] = 0.0; 690 } 691 // Store block-diagonal D matrix as collection of small dense blocks 692 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 693 D_mat[ell] = 0.0; 694 } 695 // form element matrix itself (for each block component) 696 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 697 elem_mat[ell] = 0.0; 698 } 699 for (int q = 0; q < num_qpts; ++q) { 700 for (int n = 0; n < elem_size; ++n) { 701 CeedInt d_in = -1; 702 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 703 const int qq = num_eval_mode_in*q; 704 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 705 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 706 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 707 d_in += 1; 708 B_mat_in[(qq+e_in)*elem_size + n] += 709 grad_in[(d_in*num_qpts+q) * elem_size + n]; 710 } else { 711 // LCOV_EXCL_START 712 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 713 // LCOV_EXCL_STOP 714 } 715 } 716 CeedInt d_out = -1; 717 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 718 const int qq = num_eval_mode_out*q; 719 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 720 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 721 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 722 d_out += 1; 723 B_mat_out[(qq+e_out)*elem_size + n] += 724 grad_in[(d_out*num_qpts+q) * elem_size + n]; 725 } else { 726 // LCOV_EXCL_START 727 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 728 // LCOV_EXCL_STOP 729 } 730 } 731 } 732 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 733 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 734 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 735 +comp_out; 736 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 737 e*layout_qf[2]; 738 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 739 } 740 } 741 } 742 // Compute B^T*D 743 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 744 BTD[ell] = 0.0; 745 } 746 for (int j = 0; j<elem_size; ++j) { 747 for (int q = 0; q<num_qpts; ++q) { 748 int qq = num_eval_mode_out*q; 749 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 750 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 751 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 752 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 753 } 754 } 755 } 756 } 757 758 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 759 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 760 761 // put element matrix in coordinate data structure 762 for (int i = 0; i < elem_size; ++i) { 763 for (int j = 0; j < elem_size; ++j) { 764 vals[offset + count] = elem_mat[i*elem_size + j]; 765 count++; 766 } 767 } 768 } 769 } 770 } 771 if (count != local_num_entries) 772 // LCOV_EXCL_START 773 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 774 // LCOV_EXCL_STOP 775 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 776 777 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 778 CeedChk(ierr); 779 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 780 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 781 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 782 783 return CEED_ERROR_SUCCESS; 784 } 785 786 /** 787 @brief Count number of entries for assembled CeedOperator 788 789 @param[in] op CeedOperator to assemble 790 @param[out] num_entries Number of entries in assembled representation 791 792 @return An error code: 0 - success, otherwise - failure 793 794 @ref Utility 795 **/ 796 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 797 CeedInt *num_entries) { 798 int ierr; 799 CeedElemRestriction rstr; 800 CeedInt num_elem, elem_size, num_comp; 801 802 if (op->is_composite) 803 // LCOV_EXCL_START 804 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 805 "Composite operator not supported"); 806 // LCOV_EXCL_STOP 807 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 808 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 809 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 810 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 811 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 812 813 return CEED_ERROR_SUCCESS; 814 } 815 816 /** 817 @brief Common code for creating a multigrid coarse operator and level 818 transfer operators for a CeedOperator 819 820 @param[in] op_fine Fine grid operator 821 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 822 @param[in] rstr_coarse Coarse grid restriction 823 @param[in] basis_coarse Coarse grid active vector basis 824 @param[in] basis_c_to_f Basis for coarse to fine interpolation 825 @param[out] op_coarse Coarse grid operator 826 @param[out] op_prolong Coarse to fine operator 827 @param[out] op_restrict Fine to coarse operator 828 829 @return An error code: 0 - success, otherwise - failure 830 831 @ref Developer 832 **/ 833 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 834 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 835 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 836 CeedOperator *op_restrict) { 837 int ierr; 838 Ceed ceed; 839 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 840 841 // Check for composite operator 842 bool is_composite; 843 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 844 if (is_composite) 845 // LCOV_EXCL_START 846 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 847 "Automatic multigrid setup for composite operators not supported"); 848 // LCOV_EXCL_STOP 849 850 // Coarse Grid 851 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 852 op_coarse); CeedChk(ierr); 853 CeedElemRestriction rstr_fine = NULL; 854 // -- Clone input fields 855 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 856 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 857 rstr_fine = op_fine->input_fields[i]->elem_restr; 858 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 859 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 860 CeedChk(ierr); 861 } else { 862 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 863 op_fine->input_fields[i]->elem_restr, 864 op_fine->input_fields[i]->basis, 865 op_fine->input_fields[i]->vec); CeedChk(ierr); 866 } 867 } 868 // -- Clone output fields 869 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 870 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 871 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 872 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 873 CeedChk(ierr); 874 } else { 875 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 876 op_fine->output_fields[i]->elem_restr, 877 op_fine->output_fields[i]->basis, 878 op_fine->output_fields[i]->vec); CeedChk(ierr); 879 } 880 } 881 882 // Multiplicity vector 883 CeedVector mult_vec, mult_e_vec; 884 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 885 CeedChk(ierr); 886 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 887 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 888 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 889 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 890 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 891 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 892 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 893 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 894 895 // Restriction 896 CeedInt num_comp; 897 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 898 CeedQFunction qf_restrict; 899 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 900 CeedChk(ierr); 901 CeedInt *num_comp_r_data; 902 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 903 num_comp_r_data[0] = num_comp; 904 CeedQFunctionContext ctx_r; 905 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 906 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 907 sizeof(*num_comp_r_data), num_comp_r_data); 908 CeedChk(ierr); 909 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 910 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 911 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 912 CeedChk(ierr); 913 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 914 CeedChk(ierr); 915 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 916 CEED_EVAL_INTERP); CeedChk(ierr); 917 918 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 919 CEED_QFUNCTION_NONE, op_restrict); 920 CeedChk(ierr); 921 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 922 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 923 CeedChk(ierr); 924 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 925 CEED_BASIS_COLLOCATED, mult_vec); 926 CeedChk(ierr); 927 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 928 CEED_VECTOR_ACTIVE); CeedChk(ierr); 929 930 // Prolongation 931 CeedQFunction qf_prolong; 932 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 933 CeedChk(ierr); 934 CeedInt *num_comp_p_data; 935 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 936 num_comp_p_data[0] = num_comp; 937 CeedQFunctionContext ctx_p; 938 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 939 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 940 sizeof(*num_comp_p_data), num_comp_p_data); 941 CeedChk(ierr); 942 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 943 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 944 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 945 CeedChk(ierr); 946 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 947 CeedChk(ierr); 948 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 949 CeedChk(ierr); 950 951 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 952 CEED_QFUNCTION_NONE, op_prolong); 953 CeedChk(ierr); 954 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 955 CEED_VECTOR_ACTIVE); CeedChk(ierr); 956 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 957 CEED_BASIS_COLLOCATED, mult_vec); 958 CeedChk(ierr); 959 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 960 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 961 CeedChk(ierr); 962 963 // Cleanup 964 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 965 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 966 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 967 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 968 return CEED_ERROR_SUCCESS; 969 } 970 971 /** 972 @brief Build 1D mass matrix and Laplacian with perturbation 973 974 @param[in] interp_1d Interpolation matrix in one dimension 975 @param[in] grad_1d Gradient matrix in one dimension 976 @param[in] q_weight_1d Quadrature weights in one dimension 977 @param[in] P_1d Number of basis nodes in one dimension 978 @param[in] Q_1d Number of quadrature points in one dimension 979 @param[in] dim Dimension of basis 980 @param[out] mass Assembled mass matrix in one dimension 981 @param[out] laplace Assembled perturbed Laplacian in one dimension 982 983 @return An error code: 0 - success, otherwise - failure 984 985 @ref Developer 986 **/ 987 CeedPragmaOptimizeOff 988 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 989 const CeedScalar *grad_1d, 990 const CeedScalar *q_weight_1d, CeedInt P_1d, 991 CeedInt Q_1d, CeedInt dim, 992 CeedScalar *mass, CeedScalar *laplace) { 993 994 for (CeedInt i=0; i<P_1d; i++) 995 for (CeedInt j=0; j<P_1d; j++) { 996 CeedScalar sum = 0.0; 997 for (CeedInt k=0; k<Q_1d; k++) 998 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 999 mass[i+j*P_1d] = sum; 1000 } 1001 // -- Laplacian 1002 for (CeedInt i=0; i<P_1d; i++) 1003 for (CeedInt j=0; j<P_1d; j++) { 1004 CeedScalar sum = 0.0; 1005 for (CeedInt k=0; k<Q_1d; k++) 1006 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1007 laplace[i+j*P_1d] = sum; 1008 } 1009 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1010 for (CeedInt i=0; i<P_1d; i++) 1011 laplace[i+P_1d*i] += perturbation; 1012 return CEED_ERROR_SUCCESS; 1013 } 1014 CeedPragmaOptimizeOn 1015 1016 /// @} 1017 1018 /// ---------------------------------------------------------------------------- 1019 /// CeedOperator Public API 1020 /// ---------------------------------------------------------------------------- 1021 /// @addtogroup CeedOperatorUser 1022 /// @{ 1023 1024 /** 1025 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1026 1027 This returns a CeedVector containing a matrix at each quadrature point 1028 providing the action of the CeedQFunction associated with the CeedOperator. 1029 The vector 'assembled' is of shape 1030 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1031 and contains column-major matrices representing the action of the 1032 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1033 outputs are in the order provided by the user when adding CeedOperator fields. 1034 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1035 'v', provided in that order, would result in an assembled QFunction that 1036 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1037 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1038 1039 Note: Calling this function asserts that setup is complete 1040 and sets the CeedOperator as immutable. 1041 1042 @param op CeedOperator to assemble CeedQFunction 1043 @param[out] assembled CeedVector to store assembled CeedQFunction at 1044 quadrature points 1045 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1046 CeedQFunction 1047 @param request Address of CeedRequest for non-blocking completion, else 1048 @ref CEED_REQUEST_IMMEDIATE 1049 1050 @return An error code: 0 - success, otherwise - failure 1051 1052 @ref User 1053 **/ 1054 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1055 CeedElemRestriction *rstr, 1056 CeedRequest *request) { 1057 int ierr; 1058 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1059 1060 // Backend version 1061 if (op->LinearAssembleQFunction) { 1062 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1063 CeedChk(ierr); 1064 } else { 1065 // Fallback to reference Ceed 1066 if (!op->op_fallback) { 1067 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1068 } 1069 // Assemble 1070 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1071 rstr, request); CeedChk(ierr); 1072 } 1073 return CEED_ERROR_SUCCESS; 1074 } 1075 1076 /** 1077 @brief Assemble CeedQFunction and store result internall. Return copied 1078 references of stored data to the caller. Caller is responsible for 1079 ownership and destruction of the copied references. See also 1080 @ref CeedOperatorLinearAssembleQFunction 1081 1082 @param op CeedOperator to assemble CeedQFunction 1083 @param assembled CeedVector to store assembled CeedQFunction at 1084 quadrature points 1085 @param rstr CeedElemRestriction for CeedVector containing assembled 1086 CeedQFunction 1087 @param request Address of CeedRequest for non-blocking completion, else 1088 @ref CEED_REQUEST_IMMEDIATE 1089 1090 @return An error code: 0 - success, otherwise - failure 1091 1092 @ref User 1093 **/ 1094 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1095 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1096 int ierr; 1097 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1098 1099 // Backend version 1100 if (op->LinearAssembleQFunctionUpdate) { 1101 if (op->has_qf_assembled) { 1102 ierr = op->LinearAssembleQFunctionUpdate(op, op->qf_assembled, 1103 op->qf_assembled_rstr, request); 1104 } else { 1105 ierr = op->LinearAssembleQFunction(op, &op->qf_assembled, 1106 &op->qf_assembled_rstr, request); 1107 } 1108 CeedChk(ierr); 1109 op->has_qf_assembled = true; 1110 // Copy reference to internally held copy 1111 *assembled = NULL; 1112 *rstr = NULL; 1113 ierr = CeedVectorReferenceCopy(op->qf_assembled, assembled); CeedChk(ierr); 1114 ierr = CeedElemRestrictionReferenceCopy(op->qf_assembled_rstr, rstr); 1115 } else { 1116 // Fallback to reference Ceed 1117 if (!op->op_fallback) { 1118 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1119 } 1120 // Assemble 1121 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1122 assembled, rstr, request); CeedChk(ierr); 1123 } 1124 CeedChk(ierr); 1125 1126 return CEED_ERROR_SUCCESS; 1127 } 1128 1129 /** 1130 @brief Assemble the diagonal of a square linear CeedOperator 1131 1132 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1133 1134 Note: Currently only non-composite CeedOperators with a single field and 1135 composite CeedOperators with single field sub-operators are supported. 1136 1137 Note: Calling this function asserts that setup is complete 1138 and sets the CeedOperator as immutable. 1139 1140 @param op CeedOperator to assemble CeedQFunction 1141 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1142 @param request Address of CeedRequest for non-blocking completion, else 1143 @ref CEED_REQUEST_IMMEDIATE 1144 1145 @return An error code: 0 - success, otherwise - failure 1146 1147 @ref User 1148 **/ 1149 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1150 CeedRequest *request) { 1151 int ierr; 1152 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1153 1154 // Use backend version, if available 1155 if (op->LinearAssembleDiagonal) { 1156 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1157 return CEED_ERROR_SUCCESS; 1158 } else if (op->LinearAssembleAddDiagonal) { 1159 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1160 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1161 return CEED_ERROR_SUCCESS; 1162 } else { 1163 // Check for valid fallback resource 1164 const char *resource, *fallback_resource; 1165 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1166 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1167 CeedChk(ierr); 1168 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1169 // Fallback to reference Ceed 1170 if (!op->op_fallback) { 1171 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1172 } 1173 // Assemble 1174 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1175 CeedChk(ierr); 1176 return CEED_ERROR_SUCCESS; 1177 } 1178 } 1179 1180 // Default interface implementation 1181 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1182 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1183 CeedChk(ierr); 1184 return CEED_ERROR_SUCCESS; 1185 } 1186 1187 /** 1188 @brief Assemble the diagonal of a square linear CeedOperator 1189 1190 This sums into a CeedVector the diagonal of a linear CeedOperator. 1191 1192 Note: Currently only non-composite CeedOperators with a single field and 1193 composite CeedOperators with single field sub-operators are supported. 1194 1195 Note: Calling this function asserts that setup is complete 1196 and sets the CeedOperator as immutable. 1197 1198 @param op CeedOperator to assemble CeedQFunction 1199 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1200 @param request Address of CeedRequest for non-blocking completion, else 1201 @ref CEED_REQUEST_IMMEDIATE 1202 1203 @return An error code: 0 - success, otherwise - failure 1204 1205 @ref User 1206 **/ 1207 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1208 CeedRequest *request) { 1209 int ierr; 1210 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1211 1212 // Use backend version, if available 1213 if (op->LinearAssembleAddDiagonal) { 1214 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1215 return CEED_ERROR_SUCCESS; 1216 } else { 1217 // Check for valid fallback resource 1218 const char *resource, *fallback_resource; 1219 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1220 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1221 CeedChk(ierr); 1222 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1223 // Fallback to reference Ceed 1224 if (!op->op_fallback) { 1225 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1226 } 1227 // Assemble 1228 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1229 request); CeedChk(ierr); 1230 return CEED_ERROR_SUCCESS; 1231 } 1232 } 1233 1234 // Default interface implementation 1235 bool is_composite; 1236 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1237 if (is_composite) { 1238 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1239 false, assembled); CeedChk(ierr); 1240 return CEED_ERROR_SUCCESS; 1241 } else { 1242 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1243 CeedChk(ierr); 1244 return CEED_ERROR_SUCCESS; 1245 } 1246 } 1247 1248 /** 1249 @brief Assemble the point block diagonal of a square linear CeedOperator 1250 1251 This overwrites a CeedVector with the point block diagonal of a linear 1252 CeedOperator. 1253 1254 Note: Currently only non-composite CeedOperators with a single field and 1255 composite CeedOperators with single field sub-operators are supported. 1256 1257 Note: Calling this function asserts that setup is complete 1258 and sets the CeedOperator as immutable. 1259 1260 @param op CeedOperator to assemble CeedQFunction 1261 @param[out] assembled CeedVector to store assembled CeedOperator point block 1262 diagonal, provided in row-major form with an 1263 @a num_comp * @a num_comp block at each node. The dimensions 1264 of this vector are derived from the active vector 1265 for the CeedOperator. The array has shape 1266 [nodes, component out, component in]. 1267 @param request Address of CeedRequest for non-blocking completion, else 1268 CEED_REQUEST_IMMEDIATE 1269 1270 @return An error code: 0 - success, otherwise - failure 1271 1272 @ref User 1273 **/ 1274 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1275 CeedVector assembled, CeedRequest *request) { 1276 int ierr; 1277 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1278 1279 // Use backend version, if available 1280 if (op->LinearAssemblePointBlockDiagonal) { 1281 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1282 CeedChk(ierr); 1283 return CEED_ERROR_SUCCESS; 1284 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1285 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1286 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1287 request); CeedChk(ierr); 1288 return CEED_ERROR_SUCCESS; 1289 } else { 1290 // Check for valid fallback resource 1291 const char *resource, *fallback_resource; 1292 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1293 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1294 CeedChk(ierr); 1295 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1296 // Fallback to reference Ceed 1297 if (!op->op_fallback) { 1298 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1299 } 1300 // Assemble 1301 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1302 assembled, request); CeedChk(ierr); 1303 return CEED_ERROR_SUCCESS; 1304 } 1305 } 1306 1307 // Default interface implementation 1308 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1309 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1310 CeedChk(ierr); 1311 return CEED_ERROR_SUCCESS; 1312 } 1313 1314 /** 1315 @brief Assemble the point block diagonal of a square linear CeedOperator 1316 1317 This sums into a CeedVector with the point block diagonal of a linear 1318 CeedOperator. 1319 1320 Note: Currently only non-composite CeedOperators with a single field and 1321 composite CeedOperators with single field sub-operators are supported. 1322 1323 Note: Calling this function asserts that setup is complete 1324 and sets the CeedOperator as immutable. 1325 1326 @param op CeedOperator to assemble CeedQFunction 1327 @param[out] assembled CeedVector to store assembled CeedOperator point block 1328 diagonal, provided in row-major form with an 1329 @a num_comp * @a num_comp block at each node. The dimensions 1330 of this vector are derived from the active vector 1331 for the CeedOperator. The array has shape 1332 [nodes, component out, component in]. 1333 @param request Address of CeedRequest for non-blocking completion, else 1334 CEED_REQUEST_IMMEDIATE 1335 1336 @return An error code: 0 - success, otherwise - failure 1337 1338 @ref User 1339 **/ 1340 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1341 CeedVector assembled, CeedRequest *request) { 1342 int ierr; 1343 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1344 1345 // Use backend version, if available 1346 if (op->LinearAssembleAddPointBlockDiagonal) { 1347 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1348 CeedChk(ierr); 1349 return CEED_ERROR_SUCCESS; 1350 } else { 1351 // Check for valid fallback resource 1352 const char *resource, *fallback_resource; 1353 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1354 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1355 CeedChk(ierr); 1356 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1357 // Fallback to reference Ceed 1358 if (!op->op_fallback) { 1359 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1360 } 1361 // Assemble 1362 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1363 assembled, request); CeedChk(ierr); 1364 return CEED_ERROR_SUCCESS; 1365 } 1366 } 1367 1368 // Default interface implemenation 1369 bool is_composite; 1370 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1371 if (is_composite) { 1372 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1373 true, assembled); CeedChk(ierr); 1374 return CEED_ERROR_SUCCESS; 1375 } else { 1376 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1377 CeedChk(ierr); 1378 return CEED_ERROR_SUCCESS; 1379 } 1380 } 1381 1382 /** 1383 @brief Fully assemble the nonzero pattern of a linear operator. 1384 1385 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1386 1387 The assembly routines use coordinate format, with num_entries tuples of the 1388 form (i, j, value) which indicate that value should be added to the matrix 1389 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1390 This function returns the number of entries and their (i, j) locations, 1391 while CeedOperatorLinearAssemble() provides the values in the same 1392 ordering. 1393 1394 This will generally be slow unless your operator is low-order. 1395 1396 Note: Calling this function asserts that setup is complete 1397 and sets the CeedOperator as immutable. 1398 1399 @param[in] op CeedOperator to assemble 1400 @param[out] num_entries Number of entries in coordinate nonzero pattern 1401 @param[out] rows Row number for each entry 1402 @param[out] cols Column number for each entry 1403 1404 @ref User 1405 **/ 1406 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries, 1407 CeedInt **rows, CeedInt **cols) { 1408 int ierr; 1409 CeedInt num_suboperators, single_entries; 1410 CeedOperator *sub_operators; 1411 bool is_composite; 1412 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1413 1414 // Use backend version, if available 1415 if (op->LinearAssembleSymbolic) { 1416 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1417 return CEED_ERROR_SUCCESS; 1418 } else { 1419 // Check for valid fallback resource 1420 const char *resource, *fallback_resource; 1421 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1422 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1423 CeedChk(ierr); 1424 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1425 // Fallback to reference Ceed 1426 if (!op->op_fallback) { 1427 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1428 } 1429 // Assemble 1430 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1431 cols); CeedChk(ierr); 1432 return CEED_ERROR_SUCCESS; 1433 } 1434 } 1435 1436 // Default interface implementation 1437 1438 // count entries and allocate rows, cols arrays 1439 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1440 *num_entries = 0; 1441 if (is_composite) { 1442 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1443 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1444 for (int k = 0; k < num_suboperators; ++k) { 1445 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1446 &single_entries); CeedChk(ierr); 1447 *num_entries += single_entries; 1448 } 1449 } else { 1450 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1451 &single_entries); CeedChk(ierr); 1452 *num_entries += single_entries; 1453 } 1454 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1455 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1456 1457 // assemble nonzero locations 1458 CeedInt offset = 0; 1459 if (is_composite) { 1460 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1461 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1462 for (int k = 0; k < num_suboperators; ++k) { 1463 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1464 *cols); CeedChk(ierr); 1465 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1466 &single_entries); 1467 CeedChk(ierr); 1468 offset += single_entries; 1469 } 1470 } else { 1471 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1472 CeedChk(ierr); 1473 } 1474 1475 return CEED_ERROR_SUCCESS; 1476 } 1477 1478 /** 1479 @brief Fully assemble the nonzero entries of a linear operator. 1480 1481 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1482 1483 The assembly routines use coordinate format, with num_entries tuples of the 1484 form (i, j, value) which indicate that value should be added to the matrix 1485 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1486 This function returns the values of the nonzero entries to be added, their 1487 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1488 1489 This will generally be slow unless your operator is low-order. 1490 1491 Note: Calling this function asserts that setup is complete 1492 and sets the CeedOperator as immutable. 1493 1494 @param[in] op CeedOperator to assemble 1495 @param[out] values Values to assemble into matrix 1496 1497 @ref User 1498 **/ 1499 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1500 int ierr; 1501 CeedInt num_suboperators, single_entries = 0; 1502 CeedOperator *sub_operators; 1503 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1504 1505 // Use backend version, if available 1506 if (op->LinearAssemble) { 1507 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1508 return CEED_ERROR_SUCCESS; 1509 } else { 1510 // Check for valid fallback resource 1511 const char *resource, *fallback_resource; 1512 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1513 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1514 CeedChk(ierr); 1515 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1516 // Fallback to reference Ceed 1517 if (!op->op_fallback) { 1518 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1519 } 1520 // Assemble 1521 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1522 return CEED_ERROR_SUCCESS; 1523 } 1524 } 1525 1526 // Default interface implementation 1527 bool is_composite; 1528 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1529 1530 CeedInt offset = 0; 1531 if (is_composite) { 1532 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1533 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1534 for (int k = 0; k < num_suboperators; ++k) { 1535 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1536 CeedChk(ierr); 1537 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1538 &single_entries); 1539 CeedChk(ierr); 1540 offset += single_entries; 1541 } 1542 } else { 1543 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1544 } 1545 1546 return CEED_ERROR_SUCCESS; 1547 } 1548 1549 /** 1550 @brief Create a multigrid coarse operator and level transfer operators 1551 for a CeedOperator, creating the prolongation basis from the 1552 fine and coarse grid interpolation 1553 1554 Note: Calling this function asserts that setup is complete 1555 and sets the CeedOperator as immutable. 1556 1557 @param[in] op_fine Fine grid operator 1558 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1559 @param[in] rstr_coarse Coarse grid restriction 1560 @param[in] basis_coarse Coarse grid active vector basis 1561 @param[out] op_coarse Coarse grid operator 1562 @param[out] op_prolong Coarse to fine operator 1563 @param[out] op_restrict Fine to coarse operator 1564 1565 @return An error code: 0 - success, otherwise - failure 1566 1567 @ref User 1568 **/ 1569 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1570 CeedVector p_mult_fine, 1571 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1572 CeedOperator *op_coarse, CeedOperator *op_prolong, 1573 CeedOperator *op_restrict) { 1574 int ierr; 1575 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1576 Ceed ceed; 1577 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1578 1579 // Check for compatible quadrature spaces 1580 CeedBasis basis_fine; 1581 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1582 CeedInt Q_f, Q_c; 1583 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1584 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1585 if (Q_f != Q_c) 1586 // LCOV_EXCL_START 1587 return CeedError(ceed, CEED_ERROR_DIMENSION, 1588 "Bases must have compatible quadrature spaces"); 1589 // LCOV_EXCL_STOP 1590 1591 // Coarse to fine basis 1592 CeedInt P_f, P_c, Q = Q_f; 1593 bool is_tensor_f, is_tensor_c; 1594 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1595 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1596 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1597 if (is_tensor_f && is_tensor_c) { 1598 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1599 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1600 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1601 } else if (!is_tensor_f && !is_tensor_c) { 1602 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1603 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1604 } else { 1605 // LCOV_EXCL_START 1606 return CeedError(ceed, CEED_ERROR_MINOR, 1607 "Bases must both be tensor or non-tensor"); 1608 // LCOV_EXCL_STOP 1609 } 1610 1611 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1612 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1613 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1614 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1615 if (is_tensor_f) { 1616 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1617 memcpy(interp_c, basis_coarse->interp_1d, 1618 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1619 } else { 1620 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1621 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1622 } 1623 1624 // -- QR Factorization, interp_f = Q R 1625 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1626 1627 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1628 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1629 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1630 1631 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1632 for (CeedInt j=0; j<P_c; j++) { // Column j 1633 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1634 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1635 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1636 for (CeedInt k=i+1; k<P_f; k++) 1637 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1638 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1639 } 1640 } 1641 ierr = CeedFree(&tau); CeedChk(ierr); 1642 ierr = CeedFree(&interp_c); CeedChk(ierr); 1643 ierr = CeedFree(&interp_f); CeedChk(ierr); 1644 1645 // Complete with interp_c_to_f versions of code 1646 if (is_tensor_f) { 1647 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1648 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1649 CeedChk(ierr); 1650 } else { 1651 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1652 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1653 CeedChk(ierr); 1654 } 1655 1656 // Cleanup 1657 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1658 return CEED_ERROR_SUCCESS; 1659 } 1660 1661 /** 1662 @brief Create a multigrid coarse operator and level transfer operators 1663 for a CeedOperator with a tensor basis for the active basis 1664 1665 Note: Calling this function asserts that setup is complete 1666 and sets the CeedOperator as immutable. 1667 1668 @param[in] op_fine Fine grid operator 1669 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1670 @param[in] rstr_coarse Coarse grid restriction 1671 @param[in] basis_coarse Coarse grid active vector basis 1672 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1673 @param[out] op_coarse Coarse grid operator 1674 @param[out] op_prolong Coarse to fine operator 1675 @param[out] op_restrict Fine to coarse operator 1676 1677 @return An error code: 0 - success, otherwise - failure 1678 1679 @ref User 1680 **/ 1681 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1682 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1683 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1684 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1685 int ierr; 1686 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1687 Ceed ceed; 1688 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1689 1690 // Check for compatible quadrature spaces 1691 CeedBasis basis_fine; 1692 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1693 CeedInt Q_f, Q_c; 1694 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1695 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1696 if (Q_f != Q_c) 1697 // LCOV_EXCL_START 1698 return CeedError(ceed, CEED_ERROR_DIMENSION, 1699 "Bases must have compatible quadrature spaces"); 1700 // LCOV_EXCL_STOP 1701 1702 // Coarse to fine basis 1703 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1704 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1705 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1706 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1707 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1708 CeedChk(ierr); 1709 P_1d_c = dim == 1 ? num_nodes_c : 1710 dim == 2 ? sqrt(num_nodes_c) : 1711 cbrt(num_nodes_c); 1712 CeedScalar *q_ref, *q_weight, *grad; 1713 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1714 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1715 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1716 CeedBasis basis_c_to_f; 1717 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1718 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1719 CeedChk(ierr); 1720 ierr = CeedFree(&q_ref); CeedChk(ierr); 1721 ierr = CeedFree(&q_weight); CeedChk(ierr); 1722 ierr = CeedFree(&grad); CeedChk(ierr); 1723 1724 // Core code 1725 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1726 basis_coarse, basis_c_to_f, op_coarse, 1727 op_prolong, op_restrict); 1728 CeedChk(ierr); 1729 return CEED_ERROR_SUCCESS; 1730 } 1731 1732 /** 1733 @brief Create a multigrid coarse operator and level transfer operators 1734 for a CeedOperator with a non-tensor basis for the active vector 1735 1736 Note: Calling this function asserts that setup is complete 1737 and sets the CeedOperator as immutable. 1738 1739 @param[in] op_fine Fine grid operator 1740 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1741 @param[in] rstr_coarse Coarse grid restriction 1742 @param[in] basis_coarse Coarse grid active vector basis 1743 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1744 @param[out] op_coarse Coarse grid operator 1745 @param[out] op_prolong Coarse to fine operator 1746 @param[out] op_restrict Fine to coarse operator 1747 1748 @return An error code: 0 - success, otherwise - failure 1749 1750 @ref User 1751 **/ 1752 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1753 CeedVector p_mult_fine, 1754 CeedElemRestriction rstr_coarse, 1755 CeedBasis basis_coarse, 1756 const CeedScalar *interp_c_to_f, 1757 CeedOperator *op_coarse, 1758 CeedOperator *op_prolong, 1759 CeedOperator *op_restrict) { 1760 int ierr; 1761 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1762 Ceed ceed; 1763 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1764 1765 // Check for compatible quadrature spaces 1766 CeedBasis basis_fine; 1767 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1768 CeedInt Q_f, Q_c; 1769 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1770 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1771 if (Q_f != Q_c) 1772 // LCOV_EXCL_START 1773 return CeedError(ceed, CEED_ERROR_DIMENSION, 1774 "Bases must have compatible quadrature spaces"); 1775 // LCOV_EXCL_STOP 1776 1777 // Coarse to fine basis 1778 CeedElemTopology topo; 1779 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1780 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1781 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1782 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1783 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1784 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1785 CeedChk(ierr); 1786 CeedScalar *q_ref, *q_weight, *grad; 1787 ierr = CeedCalloc(num_nodes_f, &q_ref); CeedChk(ierr); 1788 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1789 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 1790 CeedBasis basis_c_to_f; 1791 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 1792 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1793 CeedChk(ierr); 1794 ierr = CeedFree(&q_ref); CeedChk(ierr); 1795 ierr = CeedFree(&q_weight); CeedChk(ierr); 1796 ierr = CeedFree(&grad); CeedChk(ierr); 1797 1798 // Core code 1799 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1800 basis_coarse, basis_c_to_f, op_coarse, 1801 op_prolong, op_restrict); 1802 CeedChk(ierr); 1803 return CEED_ERROR_SUCCESS; 1804 } 1805 1806 /** 1807 @brief Build a FDM based approximate inverse for each element for a 1808 CeedOperator 1809 1810 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 1811 Method based approximate inverse. This function obtains the simultaneous 1812 diagonalization for the 1D mass and Laplacian operators, 1813 M = V^T V, K = V^T S V. 1814 The assembled QFunction is used to modify the eigenvalues from simultaneous 1815 diagonalization and obtain an approximate inverse of the form 1816 V^T S^hat V. The CeedOperator must be linear and non-composite. The 1817 associated CeedQFunction must therefore also be linear. 1818 1819 Note: Calling this function asserts that setup is complete 1820 and sets the CeedOperator as immutable. 1821 1822 @param op CeedOperator to create element inverses 1823 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 1824 for each element 1825 @param request Address of CeedRequest for non-blocking completion, else 1826 @ref CEED_REQUEST_IMMEDIATE 1827 1828 @return An error code: 0 - success, otherwise - failure 1829 1830 @ref Backend 1831 **/ 1832 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 1833 CeedRequest *request) { 1834 int ierr; 1835 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1836 1837 // Use backend version, if available 1838 if (op->CreateFDMElementInverse) { 1839 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 1840 return CEED_ERROR_SUCCESS; 1841 } else { 1842 // Check for valid fallback resource 1843 const char *resource, *fallback_resource; 1844 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1845 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1846 CeedChk(ierr); 1847 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1848 // Fallback to reference Ceed 1849 if (!op->op_fallback) { 1850 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1851 } 1852 // Assemble 1853 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 1854 CeedChk(ierr); 1855 return CEED_ERROR_SUCCESS; 1856 } 1857 } 1858 1859 // Interface implementation 1860 Ceed ceed, ceed_parent; 1861 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1862 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 1863 ceed_parent = ceed_parent ? ceed_parent : ceed; 1864 CeedQFunction qf; 1865 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 1866 1867 // Determine active input basis 1868 bool interp = false, grad = false; 1869 CeedBasis basis = NULL; 1870 CeedElemRestriction rstr = NULL; 1871 CeedOperatorField *op_fields; 1872 CeedQFunctionField *qf_fields; 1873 CeedInt num_input_fields; 1874 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 1875 CeedChk(ierr); 1876 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 1877 for (CeedInt i=0; i<num_input_fields; i++) { 1878 CeedVector vec; 1879 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 1880 if (vec == CEED_VECTOR_ACTIVE) { 1881 CeedEvalMode eval_mode; 1882 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 1883 interp = interp || eval_mode == CEED_EVAL_INTERP; 1884 grad = grad || eval_mode == CEED_EVAL_GRAD; 1885 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 1886 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 1887 } 1888 } 1889 if (!basis) 1890 // LCOV_EXCL_START 1891 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 1892 // LCOV_EXCL_STOP 1893 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1, 1894 l_size = 1; 1895 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 1896 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 1897 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 1898 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 1899 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 1900 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 1901 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 1902 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 1903 1904 // Build and diagonalize 1D Mass and Laplacian 1905 bool tensor_basis; 1906 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 1907 if (!tensor_basis) 1908 // LCOV_EXCL_START 1909 return CeedError(ceed, CEED_ERROR_BACKEND, 1910 "FDMElementInverse only supported for tensor " 1911 "bases"); 1912 // LCOV_EXCL_STOP 1913 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 1914 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 1915 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 1916 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 1917 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 1918 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 1919 // -- Build matrices 1920 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 1921 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 1922 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 1923 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 1924 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 1925 mass, laplace); CeedChk(ierr); 1926 1927 // -- Diagonalize 1928 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 1929 CeedChk(ierr); 1930 ierr = CeedFree(&mass); CeedChk(ierr); 1931 ierr = CeedFree(&laplace); CeedChk(ierr); 1932 for (CeedInt i=0; i<P_1d; i++) 1933 for (CeedInt j=0; j<P_1d; j++) 1934 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 1935 ierr = CeedFree(&x); CeedChk(ierr); 1936 1937 // Assemble QFunction 1938 CeedVector assembled; 1939 CeedElemRestriction rstr_qf; 1940 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 1941 &rstr_qf, request); CeedChk(ierr); 1942 CeedInt layout[3]; 1943 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 1944 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 1945 CeedScalar max_norm = 0; 1946 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 1947 1948 // Calculate element averages 1949 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 1950 CeedScalar *elem_avg; 1951 const CeedScalar *assembled_array, *q_weight_array; 1952 CeedVector q_weight; 1953 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 1954 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 1955 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 1956 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 1957 CeedChk(ierr); 1958 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 1959 CeedChk(ierr); 1960 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 1961 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 1962 for (CeedInt e=0; e<num_elem; e++) { 1963 CeedInt count = 0; 1964 for (CeedInt q=0; q<num_qpts; q++) 1965 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 1966 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 1967 qf_value_bound) { 1968 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 1969 q_weight_array[q]; 1970 count++; 1971 } 1972 if (count) { 1973 elem_avg[e] /= count; 1974 } else { 1975 elem_avg[e] = 1.0; 1976 } 1977 } 1978 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 1979 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 1980 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 1981 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 1982 1983 // Build FDM diagonal 1984 CeedVector q_data; 1985 CeedScalar *q_data_array, *fdm_diagonal; 1986 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 1987 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 1988 for (CeedInt c=0; c<num_comp; c++) 1989 for (CeedInt n=0; n<elem_size; n++) { 1990 if (interp) 1991 fdm_diagonal[c*elem_size + n] = 1.0; 1992 if (grad) 1993 for (CeedInt d=0; d<dim; d++) { 1994 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 1995 fdm_diagonal[c*elem_size + n] += lambda[i]; 1996 } 1997 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 1998 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 1999 } 2000 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2001 CeedChk(ierr); 2002 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2003 ierr = CeedVectorGetArray(q_data, CEED_MEM_HOST, &q_data_array); CeedChk(ierr); 2004 for (CeedInt e=0; e<num_elem; e++) 2005 for (CeedInt c=0; c<num_comp; c++) 2006 for (CeedInt n=0; n<elem_size; n++) 2007 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2008 fdm_diagonal[c*elem_size + n]); 2009 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2010 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2011 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2012 2013 // Setup FDM operator 2014 // -- Basis 2015 CeedBasis fdm_basis; 2016 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2017 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2018 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2019 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2020 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2021 fdm_interp, grad_dummy, q_ref_dummy, 2022 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2023 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2024 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2025 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2026 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2027 ierr = CeedFree(&lambda); CeedChk(ierr); 2028 2029 // -- Restriction 2030 CeedElemRestriction rstr_qd_i; 2031 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2032 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2033 num_comp, num_elem*num_comp*elem_size, 2034 strides, &rstr_qd_i); CeedChk(ierr); 2035 // -- QFunction 2036 CeedQFunction qf_fdm; 2037 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2038 CeedChk(ierr); 2039 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2040 CeedChk(ierr); 2041 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2042 CeedChk(ierr); 2043 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2044 CeedChk(ierr); 2045 // -- QFunction context 2046 CeedInt *num_comp_data; 2047 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2048 num_comp_data[0] = num_comp; 2049 CeedQFunctionContext ctx_fdm; 2050 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2051 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2052 sizeof(*num_comp_data), num_comp_data); 2053 CeedChk(ierr); 2054 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2055 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2056 // -- Operator 2057 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2058 CeedChk(ierr); 2059 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2060 CeedChk(ierr); 2061 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2062 q_data); CeedChk(ierr); 2063 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2064 CeedChk(ierr); 2065 2066 // Cleanup 2067 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2068 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2069 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2070 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2071 2072 return CEED_ERROR_SUCCESS; 2073 } 2074 2075 /// @} 2076