1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->is_interface_setup = false; 75 op_ref->is_backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 79 &op_ref->qf_assembled); CeedChk(ierr); 80 op->op_fallback = op_ref; 81 82 // Clone QF 83 CeedQFunction qf_ref; 84 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 85 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 86 qf_ref->data = NULL; 87 qf_ref->ceed = ceed_ref; 88 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 89 op_ref->qf = qf_ref; 90 op->qf_fallback = qf_ref; 91 return CEED_ERROR_SUCCESS; 92 } 93 94 /** 95 @brief Select correct basis matrix pointer based on CeedEvalMode 96 97 @param[in] eval_mode Current basis evaluation mode 98 @param[in] identity Pointer to identity matrix 99 @param[in] interp Pointer to interpolation matrix 100 @param[in] grad Pointer to gradient matrix 101 @param[out] basis_ptr Basis pointer to set 102 103 @ref Developer 104 **/ 105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 106 const CeedScalar *identity, const CeedScalar *interp, 107 const CeedScalar *grad, const CeedScalar **basis_ptr) { 108 switch (eval_mode) { 109 case CEED_EVAL_NONE: 110 *basis_ptr = identity; 111 break; 112 case CEED_EVAL_INTERP: 113 *basis_ptr = interp; 114 break; 115 case CEED_EVAL_GRAD: 116 *basis_ptr = grad; 117 break; 118 case CEED_EVAL_WEIGHT: 119 case CEED_EVAL_DIV: 120 case CEED_EVAL_CURL: 121 break; // Caught by QF Assembly 122 } 123 } 124 125 /** 126 @brief Create point block restriction for active operator field 127 128 @param[in] rstr Original CeedElemRestriction for active field 129 @param[out] pointblock_rstr Address of the variable where the newly created 130 CeedElemRestriction will be stored 131 132 @return An error code: 0 - success, otherwise - failure 133 134 @ref Developer 135 **/ 136 static int CeedOperatorCreateActivePointBlockRestriction( 137 CeedElemRestriction rstr, 138 CeedElemRestriction *pointblock_rstr) { 139 int ierr; 140 Ceed ceed; 141 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 142 const CeedInt *offsets; 143 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 144 CeedChk(ierr); 145 146 // Expand offsets 147 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 148 *pointblock_offsets; 149 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 152 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 153 CeedInt shift = num_comp; 154 if (comp_stride != 1) 155 shift *= num_comp; 156 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 157 CeedChk(ierr); 158 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 159 pointblock_offsets[i] = offsets[i]*shift; 160 if (pointblock_offsets[i] > max) 161 max = pointblock_offsets[i]; 162 } 163 164 // Create new restriction 165 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 166 1, max + num_comp*num_comp, CEED_MEM_HOST, 167 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 168 CeedChk(ierr); 169 170 // Cleanup 171 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 172 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else 181 CEED_REQUEST_IMMEDIATE 182 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 183 @param[out] assembled CeedVector to store assembled diagonal 184 185 @return An error code: 0 - success, otherwise - failure 186 187 @ref Developer 188 **/ 189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 190 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 191 int ierr; 192 Ceed ceed; 193 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 194 195 // Assemble QFunction 196 CeedQFunction qf; 197 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 198 CeedInt num_input_fields, num_output_fields; 199 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 200 CeedChk(ierr); 201 CeedVector assembled_qf; 202 CeedElemRestriction rstr; 203 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 204 &rstr, request); CeedChk(ierr); 205 CeedInt layout[3]; 206 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 207 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 208 CeedScalar max_norm = 0; 209 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 210 211 // Determine active input basis 212 CeedOperatorField *op_fields; 213 CeedQFunctionField *qf_fields; 214 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 215 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 216 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 217 CeedEvalMode *eval_mode_in = NULL; 218 CeedBasis basis_in = NULL; 219 CeedElemRestriction rstr_in = NULL; 220 for (CeedInt i=0; i<num_input_fields; i++) { 221 CeedVector vec; 222 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 223 if (vec == CEED_VECTOR_ACTIVE) { 224 CeedElemRestriction rstr; 225 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 226 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 227 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 228 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 229 if (rstr_in && rstr_in != rstr) 230 // LCOV_EXCL_START 231 return CeedError(ceed, CEED_ERROR_BACKEND, 232 "Multi-field non-composite operator diagonal assembly not supported"); 233 // LCOV_EXCL_STOP 234 rstr_in = rstr; 235 CeedEvalMode eval_mode; 236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 237 switch (eval_mode) { 238 case CEED_EVAL_NONE: 239 case CEED_EVAL_INTERP: 240 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 241 eval_mode_in[num_eval_mode_in] = eval_mode; 242 num_eval_mode_in += 1; 243 break; 244 case CEED_EVAL_GRAD: 245 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 246 for (CeedInt d=0; d<dim; d++) 247 eval_mode_in[num_eval_mode_in+d] = eval_mode; 248 num_eval_mode_in += dim; 249 break; 250 case CEED_EVAL_WEIGHT: 251 case CEED_EVAL_DIV: 252 case CEED_EVAL_CURL: 253 break; // Caught by QF Assembly 254 } 255 } 256 } 257 258 // Determine active output basis 259 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 260 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 261 CeedInt num_eval_mode_out = 0; 262 CeedEvalMode *eval_mode_out = NULL; 263 CeedBasis basis_out = NULL; 264 CeedElemRestriction rstr_out = NULL; 265 for (CeedInt i=0; i<num_output_fields; i++) { 266 CeedVector vec; 267 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 268 if (vec == CEED_VECTOR_ACTIVE) { 269 CeedElemRestriction rstr; 270 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 271 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 272 if (rstr_out && rstr_out != rstr) 273 // LCOV_EXCL_START 274 return CeedError(ceed, CEED_ERROR_BACKEND, 275 "Multi-field non-composite operator diagonal assembly not supported"); 276 // LCOV_EXCL_STOP 277 rstr_out = rstr; 278 CeedEvalMode eval_mode; 279 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 280 switch (eval_mode) { 281 case CEED_EVAL_NONE: 282 case CEED_EVAL_INTERP: 283 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 284 eval_mode_out[num_eval_mode_out] = eval_mode; 285 num_eval_mode_out += 1; 286 break; 287 case CEED_EVAL_GRAD: 288 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 289 for (CeedInt d=0; d<dim; d++) 290 eval_mode_out[num_eval_mode_out+d] = eval_mode; 291 num_eval_mode_out += dim; 292 break; 293 case CEED_EVAL_WEIGHT: 294 case CEED_EVAL_DIV: 295 case CEED_EVAL_CURL: 296 break; // Caught by QF Assembly 297 } 298 } 299 } 300 301 // Assemble point block diagonal restriction, if needed 302 CeedElemRestriction diag_rstr = rstr_out; 303 if (is_pointblock) { 304 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 305 CeedChk(ierr); 306 } 307 308 // Create diagonal vector 309 CeedVector elem_diag; 310 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 311 CeedChk(ierr); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array; 315 const CeedScalar *assembled_qf_array; 316 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 317 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 318 CeedChk(ierr); 319 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 320 CeedChk(ierr); 321 CeedInt num_elem, num_nodes, num_qpts; 322 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 323 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 324 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 325 // Basis matrices 326 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 327 CeedScalar *identity = NULL; 328 bool evalNone = false; 329 for (CeedInt i=0; i<num_eval_mode_in; i++) 330 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 331 for (CeedInt i=0; i<num_eval_mode_out; i++) 332 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 333 if (evalNone) { 334 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 335 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 336 identity[i*num_nodes+i] = 1.0; 337 } 338 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 339 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 340 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 341 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 342 // Compute the diagonal of B^T D B 343 // Each element 344 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 345 for (CeedInt e=0; e<num_elem; e++) { 346 CeedInt d_out = -1; 347 // Each basis eval mode pair 348 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 349 const CeedScalar *bt = NULL; 350 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 351 d_out += 1; 352 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 353 &grad_out[d_out*num_qpts*num_nodes], &bt); 354 CeedInt d_in = -1; 355 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 356 const CeedScalar *b = NULL; 357 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 358 d_in += 1; 359 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 360 &grad_in[d_in*num_qpts*num_nodes], &b); 361 // Each component 362 for (CeedInt c_out=0; c_out<num_comp; c_out++) 363 // Each qpoint/node pair 364 for (CeedInt q=0; q<num_qpts; q++) 365 if (is_pointblock) { 366 // Point Block Diagonal 367 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 368 const CeedScalar qf_value = 369 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 370 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 371 if (fabs(qf_value) > qf_value_bound) 372 for (CeedInt n=0; n<num_nodes; n++) 373 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 374 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 375 } 376 } else { 377 // Diagonal Only 378 const CeedScalar qf_value = 379 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 380 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 381 if (fabs(qf_value) > qf_value_bound) 382 for (CeedInt n=0; n<num_nodes; n++) 383 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 384 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 385 } 386 } 387 } 388 } 389 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 390 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 391 CeedChk(ierr); 392 393 // Assemble local operator diagonal 394 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 395 assembled, request); CeedChk(ierr); 396 397 // Cleanup 398 if (is_pointblock) { 399 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 400 } 401 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 402 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 403 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 404 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 405 ierr = CeedFree(&identity); CeedChk(ierr); 406 407 return CEED_ERROR_SUCCESS; 408 } 409 410 /** 411 @brief Core logic for assembling composite operator diagonal 412 413 @param[in] op CeedOperator to assemble point block diagonal 414 @param[in] request Address of CeedRequest for non-blocking completion, else 415 CEED_REQUEST_IMMEDIATE 416 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 417 @param[out] assembled CeedVector to store assembled diagonal 418 419 @return An error code: 0 - success, otherwise - failure 420 421 @ref Developer 422 **/ 423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 424 CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 int ierr; 427 CeedInt num_sub; 428 CeedOperator *suboperators; 429 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 430 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 431 for (CeedInt i = 0; i < num_sub; i++) { 432 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 433 is_pointblock, assembled); CeedChk(ierr); 434 } 435 return CEED_ERROR_SUCCESS; 436 } 437 438 /** 439 @brief Build nonzero pattern for non-composite operator 440 441 Users should generally use CeedOperatorLinearAssembleSymbolic() 442 443 @param[in] op CeedOperator to assemble nonzero pattern 444 @param[in] offset Offset for number of entries 445 @param[out] rows Row number for each entry 446 @param[out] cols Column number for each entry 447 448 @return An error code: 0 - success, otherwise - failure 449 450 @ref Developer 451 **/ 452 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 453 CeedInt *rows, CeedInt *cols) { 454 int ierr; 455 Ceed ceed = op->ceed; 456 if (op->is_composite) 457 // LCOV_EXCL_START 458 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 459 "Composite operator not supported"); 460 // LCOV_EXCL_STOP 461 462 CeedElemRestriction rstr_in; 463 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 464 CeedSize num_nodes; 465 CeedInt num_elem, elem_size, num_comp; 466 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 467 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 468 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 469 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 470 CeedInt layout_er[3]; 471 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 472 473 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 474 475 // Determine elem_dof relation 476 CeedVector index_vec; 477 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 478 CeedScalar *array; 479 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 480 for (CeedInt i = 0; i < num_nodes; ++i) { 481 array[i] = i; 482 } 483 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 484 CeedVector elem_dof; 485 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 486 CeedChk(ierr); 487 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 488 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 489 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 490 const CeedScalar *elem_dof_a; 491 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 492 CeedChk(ierr); 493 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 494 495 // Determine i, j locations for element matrices 496 CeedInt count = 0; 497 for (int e = 0; e < num_elem; ++e) { 498 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 499 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 500 for (int i = 0; i < elem_size; ++i) { 501 for (int j = 0; j < elem_size; ++j) { 502 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 503 (comp_out)*layout_er[1] + e*layout_er[2]; 504 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 505 (comp_in)*layout_er[1] + e*layout_er[2]; 506 507 const CeedInt row = elem_dof_a[elem_dof_index_row]; 508 const CeedInt col = elem_dof_a[elem_dof_index_col]; 509 510 rows[offset + count] = row; 511 cols[offset + count] = col; 512 count++; 513 } 514 } 515 } 516 } 517 } 518 if (count != local_num_entries) 519 // LCOV_EXCL_START 520 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 521 // LCOV_EXCL_STOP 522 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 523 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 524 525 return CEED_ERROR_SUCCESS; 526 } 527 528 /** 529 @brief Assemble nonzero entries for non-composite operator 530 531 Users should generally use CeedOperatorLinearAssemble() 532 533 @param[in] op CeedOperator to assemble 534 @param[out] offset Offest for number of entries 535 @param[out] values Values to assemble into matrix 536 537 @return An error code: 0 - success, otherwise - failure 538 539 @ref Developer 540 **/ 541 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 542 CeedVector values) { 543 int ierr; 544 Ceed ceed = op->ceed; 545 if (op->is_composite) 546 // LCOV_EXCL_START 547 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 548 "Composite operator not supported"); 549 // LCOV_EXCL_STOP 550 551 // Assemble QFunction 552 CeedQFunction qf; 553 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 554 CeedVector assembled_qf; 555 CeedElemRestriction rstr_q; 556 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 557 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 558 559 CeedSize qf_length; 560 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 561 562 CeedInt num_input_fields, num_output_fields; 563 CeedOperatorField *input_fields; 564 CeedOperatorField *output_fields; 565 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 566 &num_output_fields, &output_fields); CeedChk(ierr); 567 568 // Determine active input basis 569 CeedQFunctionField *qf_fields; 570 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 571 CeedInt num_eval_mode_in = 0, dim = 1; 572 CeedEvalMode *eval_mode_in = NULL; 573 CeedBasis basis_in = NULL; 574 CeedElemRestriction rstr_in = NULL; 575 for (CeedInt i=0; i<num_input_fields; i++) { 576 CeedVector vec; 577 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 578 if (vec == CEED_VECTOR_ACTIVE) { 579 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 580 CeedChk(ierr); 581 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 582 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 583 CeedChk(ierr); 584 CeedEvalMode eval_mode; 585 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 586 CeedChk(ierr); 587 switch (eval_mode) { 588 case CEED_EVAL_NONE: 589 case CEED_EVAL_INTERP: 590 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 591 eval_mode_in[num_eval_mode_in] = eval_mode; 592 num_eval_mode_in += 1; 593 break; 594 case CEED_EVAL_GRAD: 595 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 596 for (CeedInt d=0; d<dim; d++) { 597 eval_mode_in[num_eval_mode_in+d] = eval_mode; 598 } 599 num_eval_mode_in += dim; 600 break; 601 case CEED_EVAL_WEIGHT: 602 case CEED_EVAL_DIV: 603 case CEED_EVAL_CURL: 604 break; // Caught by QF Assembly 605 } 606 } 607 } 608 609 // Determine active output basis 610 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 611 CeedInt num_eval_mode_out = 0; 612 CeedEvalMode *eval_mode_out = NULL; 613 CeedBasis basis_out = NULL; 614 CeedElemRestriction rstr_out = NULL; 615 for (CeedInt i=0; i<num_output_fields; i++) { 616 CeedVector vec; 617 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 618 if (vec == CEED_VECTOR_ACTIVE) { 619 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 620 CeedChk(ierr); 621 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 622 CeedChk(ierr); 623 CeedEvalMode eval_mode; 624 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 625 CeedChk(ierr); 626 switch (eval_mode) { 627 case CEED_EVAL_NONE: 628 case CEED_EVAL_INTERP: 629 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 630 eval_mode_out[num_eval_mode_out] = eval_mode; 631 num_eval_mode_out += 1; 632 break; 633 case CEED_EVAL_GRAD: 634 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 635 for (CeedInt d=0; d<dim; d++) { 636 eval_mode_out[num_eval_mode_out+d] = eval_mode; 637 } 638 num_eval_mode_out += dim; 639 break; 640 case CEED_EVAL_WEIGHT: 641 case CEED_EVAL_DIV: 642 case CEED_EVAL_CURL: 643 break; // Caught by QF Assembly 644 } 645 } 646 } 647 648 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 649 // LCOV_EXCL_START 650 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 651 "Cannot assemble operator with out inputs/outputs"); 652 // LCOV_EXCL_STOP 653 654 CeedInt num_elem, elem_size, num_qpts, num_comp; 655 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 656 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 657 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 658 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 659 660 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 661 662 // loop over elements and put in data structure 663 const CeedScalar *interp_in, *grad_in; 664 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 665 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 666 667 const CeedScalar *assembled_qf_array; 668 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 669 CeedChk(ierr); 670 671 CeedInt layout_qf[3]; 672 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 673 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 674 675 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 676 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 677 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 678 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 679 num_qpts]; // logically 3-tensor 680 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 681 CeedScalar elem_mat[elem_size * elem_size]; 682 int count = 0; 683 CeedScalar *vals; 684 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 685 for (int e = 0; e < num_elem; ++e) { 686 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 687 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 688 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 689 B_mat_in[ell] = 0.0; 690 } 691 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 692 B_mat_out[ell] = 0.0; 693 } 694 // Store block-diagonal D matrix as collection of small dense blocks 695 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 696 D_mat[ell] = 0.0; 697 } 698 // form element matrix itself (for each block component) 699 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 700 elem_mat[ell] = 0.0; 701 } 702 for (int q = 0; q < num_qpts; ++q) { 703 for (int n = 0; n < elem_size; ++n) { 704 CeedInt d_in = -1; 705 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 706 const int qq = num_eval_mode_in*q; 707 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 708 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 709 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 710 d_in += 1; 711 B_mat_in[(qq+e_in)*elem_size + n] += 712 grad_in[(d_in*num_qpts+q) * elem_size + n]; 713 } else { 714 // LCOV_EXCL_START 715 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 716 // LCOV_EXCL_STOP 717 } 718 } 719 CeedInt d_out = -1; 720 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 721 const int qq = num_eval_mode_out*q; 722 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 723 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 724 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 725 d_out += 1; 726 B_mat_out[(qq+e_out)*elem_size + n] += 727 grad_in[(d_out*num_qpts+q) * elem_size + n]; 728 } else { 729 // LCOV_EXCL_START 730 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 731 // LCOV_EXCL_STOP 732 } 733 } 734 } 735 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 736 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 737 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 738 +comp_out; 739 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 740 e*layout_qf[2]; 741 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 742 } 743 } 744 } 745 // Compute B^T*D 746 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 747 BTD[ell] = 0.0; 748 } 749 for (int j = 0; j<elem_size; ++j) { 750 for (int q = 0; q<num_qpts; ++q) { 751 int qq = num_eval_mode_out*q; 752 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 753 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 754 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 755 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 756 } 757 } 758 } 759 } 760 761 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 762 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 763 764 // put element matrix in coordinate data structure 765 for (int i = 0; i < elem_size; ++i) { 766 for (int j = 0; j < elem_size; ++j) { 767 vals[offset + count] = elem_mat[i*elem_size + j]; 768 count++; 769 } 770 } 771 } 772 } 773 } 774 if (count != local_num_entries) 775 // LCOV_EXCL_START 776 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 777 // LCOV_EXCL_STOP 778 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 779 780 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 781 CeedChk(ierr); 782 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 783 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 784 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 785 786 return CEED_ERROR_SUCCESS; 787 } 788 789 /** 790 @brief Count number of entries for assembled CeedOperator 791 792 @param[in] op CeedOperator to assemble 793 @param[out] num_entries Number of entries in assembled representation 794 795 @return An error code: 0 - success, otherwise - failure 796 797 @ref Utility 798 **/ 799 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 800 CeedInt *num_entries) { 801 int ierr; 802 CeedElemRestriction rstr; 803 CeedInt num_elem, elem_size, num_comp; 804 805 if (op->is_composite) 806 // LCOV_EXCL_START 807 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 808 "Composite operator not supported"); 809 // LCOV_EXCL_STOP 810 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 811 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 812 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 813 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 814 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 815 816 return CEED_ERROR_SUCCESS; 817 } 818 819 /** 820 @brief Common code for creating a multigrid coarse operator and level 821 transfer operators for a CeedOperator 822 823 @param[in] op_fine Fine grid operator 824 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 825 @param[in] rstr_coarse Coarse grid restriction 826 @param[in] basis_coarse Coarse grid active vector basis 827 @param[in] basis_c_to_f Basis for coarse to fine interpolation 828 @param[out] op_coarse Coarse grid operator 829 @param[out] op_prolong Coarse to fine operator 830 @param[out] op_restrict Fine to coarse operator 831 832 @return An error code: 0 - success, otherwise - failure 833 834 @ref Developer 835 **/ 836 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 837 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 838 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 839 CeedOperator *op_restrict) { 840 int ierr; 841 Ceed ceed; 842 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 843 844 // Check for composite operator 845 bool is_composite; 846 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 847 if (is_composite) 848 // LCOV_EXCL_START 849 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 850 "Automatic multigrid setup for composite operators not supported"); 851 // LCOV_EXCL_STOP 852 853 // Coarse Grid 854 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 855 op_coarse); CeedChk(ierr); 856 CeedElemRestriction rstr_fine = NULL; 857 // -- Clone input fields 858 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 859 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 860 rstr_fine = op_fine->input_fields[i]->elem_restr; 861 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 862 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 863 CeedChk(ierr); 864 } else { 865 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 866 op_fine->input_fields[i]->elem_restr, 867 op_fine->input_fields[i]->basis, 868 op_fine->input_fields[i]->vec); CeedChk(ierr); 869 } 870 } 871 // -- Clone output fields 872 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 873 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 874 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 875 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 876 CeedChk(ierr); 877 } else { 878 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 879 op_fine->output_fields[i]->elem_restr, 880 op_fine->output_fields[i]->basis, 881 op_fine->output_fields[i]->vec); CeedChk(ierr); 882 } 883 } 884 885 // Multiplicity vector 886 CeedVector mult_vec, mult_e_vec; 887 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 888 CeedChk(ierr); 889 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 890 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 891 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 892 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 893 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 894 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 895 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 896 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 897 898 // Restriction 899 CeedInt num_comp; 900 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 901 CeedQFunction qf_restrict; 902 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 903 CeedChk(ierr); 904 CeedInt *num_comp_r_data; 905 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 906 num_comp_r_data[0] = num_comp; 907 CeedQFunctionContext ctx_r; 908 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 909 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 910 sizeof(*num_comp_r_data), num_comp_r_data); 911 CeedChk(ierr); 912 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 913 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 914 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 915 CeedChk(ierr); 916 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 917 CeedChk(ierr); 918 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 919 CEED_EVAL_INTERP); CeedChk(ierr); 920 921 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 922 CEED_QFUNCTION_NONE, op_restrict); 923 CeedChk(ierr); 924 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 925 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 926 CeedChk(ierr); 927 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 928 CEED_BASIS_COLLOCATED, mult_vec); 929 CeedChk(ierr); 930 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 931 CEED_VECTOR_ACTIVE); CeedChk(ierr); 932 933 // Prolongation 934 CeedQFunction qf_prolong; 935 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 936 CeedChk(ierr); 937 CeedInt *num_comp_p_data; 938 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 939 num_comp_p_data[0] = num_comp; 940 CeedQFunctionContext ctx_p; 941 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 942 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 943 sizeof(*num_comp_p_data), num_comp_p_data); 944 CeedChk(ierr); 945 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 946 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 947 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 948 CeedChk(ierr); 949 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 950 CeedChk(ierr); 951 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 952 CeedChk(ierr); 953 954 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 955 CEED_QFUNCTION_NONE, op_prolong); 956 CeedChk(ierr); 957 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 958 CEED_VECTOR_ACTIVE); CeedChk(ierr); 959 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 960 CEED_BASIS_COLLOCATED, mult_vec); 961 CeedChk(ierr); 962 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 963 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 964 CeedChk(ierr); 965 966 // Cleanup 967 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 968 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 969 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 970 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 971 return CEED_ERROR_SUCCESS; 972 } 973 974 /** 975 @brief Build 1D mass matrix and Laplacian with perturbation 976 977 @param[in] interp_1d Interpolation matrix in one dimension 978 @param[in] grad_1d Gradient matrix in one dimension 979 @param[in] q_weight_1d Quadrature weights in one dimension 980 @param[in] P_1d Number of basis nodes in one dimension 981 @param[in] Q_1d Number of quadrature points in one dimension 982 @param[in] dim Dimension of basis 983 @param[out] mass Assembled mass matrix in one dimension 984 @param[out] laplace Assembled perturbed Laplacian in one dimension 985 986 @return An error code: 0 - success, otherwise - failure 987 988 @ref Developer 989 **/ 990 CeedPragmaOptimizeOff 991 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 992 const CeedScalar *grad_1d, 993 const CeedScalar *q_weight_1d, CeedInt P_1d, 994 CeedInt Q_1d, CeedInt dim, 995 CeedScalar *mass, CeedScalar *laplace) { 996 997 for (CeedInt i=0; i<P_1d; i++) 998 for (CeedInt j=0; j<P_1d; j++) { 999 CeedScalar sum = 0.0; 1000 for (CeedInt k=0; k<Q_1d; k++) 1001 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1002 mass[i+j*P_1d] = sum; 1003 } 1004 // -- Laplacian 1005 for (CeedInt i=0; i<P_1d; i++) 1006 for (CeedInt j=0; j<P_1d; j++) { 1007 CeedScalar sum = 0.0; 1008 for (CeedInt k=0; k<Q_1d; k++) 1009 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1010 laplace[i+j*P_1d] = sum; 1011 } 1012 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1013 for (CeedInt i=0; i<P_1d; i++) 1014 laplace[i+P_1d*i] += perturbation; 1015 return CEED_ERROR_SUCCESS; 1016 } 1017 CeedPragmaOptimizeOn 1018 1019 /// @} 1020 1021 /// ---------------------------------------------------------------------------- 1022 /// CeedOperator Backend API 1023 /// ---------------------------------------------------------------------------- 1024 /// @addtogroup CeedOperatorBackend 1025 /// @{ 1026 1027 /** 1028 @brief Create object holding CeedQFunction assembly data for CeedOperator 1029 1030 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1031 @param[out] data Address of the variable where the newly created 1032 CeedQFunctionAssemblyData will be stored 1033 1034 @return An error code: 0 - success, otherwise - failure 1035 1036 @ref Backend 1037 **/ 1038 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1039 CeedQFunctionAssemblyData *data) { 1040 int ierr; 1041 1042 ierr = CeedCalloc(1, data); CeedChk(ierr); 1043 (*data)->ref_count = 1; 1044 (*data)->ceed = ceed; 1045 ierr = CeedReference(ceed); CeedChk(ierr); 1046 1047 return CEED_ERROR_SUCCESS; 1048 } 1049 1050 /** 1051 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1052 1053 @param data CeedQFunctionAssemblyData to increment the reference counter 1054 1055 @return An error code: 0 - success, otherwise - failure 1056 1057 @ref Backend 1058 **/ 1059 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1060 data->ref_count++; 1061 return CEED_ERROR_SUCCESS; 1062 } 1063 1064 /** 1065 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1066 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1067 Note: If `*data_copy` is non-NULL, then it is assumed that 1068 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1069 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1070 the only reference to this CeedQFunctionAssemblyData. 1071 1072 @param data CeedQFunctionAssemblyData to copy reference to 1073 @param[out] data_copy Variable to store copied reference 1074 1075 @return An error code: 0 - success, otherwise - failure 1076 1077 @ref Backend 1078 **/ 1079 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1080 CeedQFunctionAssemblyData *data_copy) { 1081 int ierr; 1082 1083 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1084 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1085 *data_copy = data; 1086 return CEED_ERROR_SUCCESS; 1087 } 1088 1089 /** 1090 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1091 1092 @param[in] data CeedQFunctionAssemblyData to retreive status 1093 @param[out] is_setup Boolean flag for setup status 1094 1095 @return An error code: 0 - success, otherwise - failure 1096 1097 @ref Backend 1098 **/ 1099 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1100 bool *is_setup) { 1101 *is_setup = data->is_setup; 1102 return CEED_ERROR_SUCCESS; 1103 } 1104 1105 /** 1106 @brief Set internal objects for CeedQFunctionAssemblyData 1107 1108 @param[in] data CeedQFunctionAssemblyData to set objects 1109 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1110 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1111 1112 @return An error code: 0 - success, otherwise - failure 1113 1114 @ref Backend 1115 **/ 1116 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1117 CeedVector vec, CeedElemRestriction rstr) { 1118 int ierr; 1119 1120 ierr = CeedVectorDestroy(&data->vec); CeedChk(ierr); 1121 data->vec = vec; 1122 1123 ierr = CeedElemRestrictionDestroy(&data->rstr); CeedChk(ierr); 1124 data->rstr = rstr; 1125 1126 data->is_setup = true; 1127 return CEED_ERROR_SUCCESS; 1128 } 1129 1130 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1131 CeedVector *vec, CeedElemRestriction *rstr) { 1132 if (!data->is_setup) 1133 // LCOV_EXCL_START 1134 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1135 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1136 // LCOV_EXCL_STOP 1137 1138 *vec = data->vec; 1139 *rstr = data->rstr; 1140 1141 return CEED_ERROR_SUCCESS; 1142 } 1143 1144 /** 1145 @brief Destroy CeedQFunctionAssemblyData 1146 1147 @param[out] data CeedQFunctionAssemblyData to destroy 1148 1149 @return An error code: 0 - success, otherwise - failure 1150 1151 @ref Backend 1152 **/ 1153 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1154 int ierr; 1155 1156 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1157 1158 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1159 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1160 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1161 1162 ierr = CeedFree(data); CeedChk(ierr); 1163 return CEED_ERROR_SUCCESS; 1164 } 1165 1166 /// @} 1167 1168 /// ---------------------------------------------------------------------------- 1169 /// CeedOperator Public API 1170 /// ---------------------------------------------------------------------------- 1171 /// @addtogroup CeedOperatorUser 1172 /// @{ 1173 1174 /** 1175 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1176 1177 This returns a CeedVector containing a matrix at each quadrature point 1178 providing the action of the CeedQFunction associated with the CeedOperator. 1179 The vector 'assembled' is of shape 1180 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1181 and contains column-major matrices representing the action of the 1182 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1183 outputs are in the order provided by the user when adding CeedOperator fields. 1184 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1185 'v', provided in that order, would result in an assembled QFunction that 1186 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1187 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1188 1189 Note: Calling this function asserts that setup is complete 1190 and sets the CeedOperator as immutable. 1191 1192 @param op CeedOperator to assemble CeedQFunction 1193 @param[out] assembled CeedVector to store assembled CeedQFunction at 1194 quadrature points 1195 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1196 CeedQFunction 1197 @param request Address of CeedRequest for non-blocking completion, else 1198 @ref CEED_REQUEST_IMMEDIATE 1199 1200 @return An error code: 0 - success, otherwise - failure 1201 1202 @ref User 1203 **/ 1204 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1205 CeedElemRestriction *rstr, 1206 CeedRequest *request) { 1207 int ierr; 1208 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1209 1210 // Backend version 1211 if (op->LinearAssembleQFunction) { 1212 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1213 CeedChk(ierr); 1214 } else { 1215 // Fallback to reference Ceed 1216 if (!op->op_fallback) { 1217 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1218 } 1219 // Assemble 1220 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1221 rstr, request); CeedChk(ierr); 1222 } 1223 return CEED_ERROR_SUCCESS; 1224 } 1225 1226 /** 1227 @brief Assemble CeedQFunction and store result internall. Return copied 1228 references of stored data to the caller. Caller is responsible for 1229 ownership and destruction of the copied references. See also 1230 @ref CeedOperatorLinearAssembleQFunction 1231 1232 @param op CeedOperator to assemble CeedQFunction 1233 @param assembled CeedVector to store assembled CeedQFunction at 1234 quadrature points 1235 @param rstr CeedElemRestriction for CeedVector containing assembled 1236 CeedQFunction 1237 @param request Address of CeedRequest for non-blocking completion, else 1238 @ref CEED_REQUEST_IMMEDIATE 1239 1240 @return An error code: 0 - success, otherwise - failure 1241 1242 @ref User 1243 **/ 1244 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1245 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1246 int ierr; 1247 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1248 1249 // Backend version 1250 if (op->LinearAssembleQFunctionUpdate) { 1251 bool qf_assembled_is_setup; 1252 CeedVector assembled_vec; 1253 CeedElemRestriction assembled_rstr; 1254 1255 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1256 &qf_assembled_is_setup); CeedChk(ierr); 1257 if (qf_assembled_is_setup) { 1258 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1259 &assembled_rstr); CeedChk(ierr); 1260 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1261 request); CeedChk(ierr); 1262 } else { 1263 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1264 request); CeedChk(ierr); 1265 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1266 assembled_rstr); CeedChk(ierr); 1267 } 1268 // Copy reference to internally held copy 1269 *assembled = NULL; 1270 *rstr = NULL; 1271 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1272 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1273 } else { 1274 // Fallback to reference Ceed 1275 if (!op->op_fallback) { 1276 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1277 } 1278 // Assemble 1279 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1280 assembled, rstr, request); CeedChk(ierr); 1281 } 1282 1283 return CEED_ERROR_SUCCESS; 1284 } 1285 1286 /** 1287 @brief Assemble the diagonal of a square linear CeedOperator 1288 1289 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1290 1291 Note: Currently only non-composite CeedOperators with a single field and 1292 composite CeedOperators with single field sub-operators are supported. 1293 1294 Note: Calling this function asserts that setup is complete 1295 and sets the CeedOperator as immutable. 1296 1297 @param op CeedOperator to assemble CeedQFunction 1298 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1299 @param request Address of CeedRequest for non-blocking completion, else 1300 @ref CEED_REQUEST_IMMEDIATE 1301 1302 @return An error code: 0 - success, otherwise - failure 1303 1304 @ref User 1305 **/ 1306 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1307 CeedRequest *request) { 1308 int ierr; 1309 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1310 1311 // Use backend version, if available 1312 if (op->LinearAssembleDiagonal) { 1313 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1314 return CEED_ERROR_SUCCESS; 1315 } else if (op->LinearAssembleAddDiagonal) { 1316 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1317 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1318 return CEED_ERROR_SUCCESS; 1319 } else { 1320 // Check for valid fallback resource 1321 const char *resource, *fallback_resource; 1322 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1323 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1324 CeedChk(ierr); 1325 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1326 // Fallback to reference Ceed 1327 if (!op->op_fallback) { 1328 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1329 } 1330 // Assemble 1331 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1332 CeedChk(ierr); 1333 return CEED_ERROR_SUCCESS; 1334 } 1335 } 1336 1337 // Default interface implementation 1338 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1339 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1340 CeedChk(ierr); 1341 return CEED_ERROR_SUCCESS; 1342 } 1343 1344 /** 1345 @brief Assemble the diagonal of a square linear CeedOperator 1346 1347 This sums into a CeedVector the diagonal of a linear CeedOperator. 1348 1349 Note: Currently only non-composite CeedOperators with a single field and 1350 composite CeedOperators with single field sub-operators are supported. 1351 1352 Note: Calling this function asserts that setup is complete 1353 and sets the CeedOperator as immutable. 1354 1355 @param op CeedOperator to assemble CeedQFunction 1356 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1357 @param request Address of CeedRequest for non-blocking completion, else 1358 @ref CEED_REQUEST_IMMEDIATE 1359 1360 @return An error code: 0 - success, otherwise - failure 1361 1362 @ref User 1363 **/ 1364 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1365 CeedRequest *request) { 1366 int ierr; 1367 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1368 1369 // Use backend version, if available 1370 if (op->LinearAssembleAddDiagonal) { 1371 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1372 return CEED_ERROR_SUCCESS; 1373 } else { 1374 // Check for valid fallback resource 1375 const char *resource, *fallback_resource; 1376 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1377 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1378 CeedChk(ierr); 1379 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1380 // Fallback to reference Ceed 1381 if (!op->op_fallback) { 1382 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1383 } 1384 // Assemble 1385 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1386 request); CeedChk(ierr); 1387 return CEED_ERROR_SUCCESS; 1388 } 1389 } 1390 1391 // Default interface implementation 1392 bool is_composite; 1393 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1394 if (is_composite) { 1395 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1396 false, assembled); CeedChk(ierr); 1397 return CEED_ERROR_SUCCESS; 1398 } else { 1399 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1400 CeedChk(ierr); 1401 return CEED_ERROR_SUCCESS; 1402 } 1403 } 1404 1405 /** 1406 @brief Assemble the point block diagonal of a square linear CeedOperator 1407 1408 This overwrites a CeedVector with the point block diagonal of a linear 1409 CeedOperator. 1410 1411 Note: Currently only non-composite CeedOperators with a single field and 1412 composite CeedOperators with single field sub-operators are supported. 1413 1414 Note: Calling this function asserts that setup is complete 1415 and sets the CeedOperator as immutable. 1416 1417 @param op CeedOperator to assemble CeedQFunction 1418 @param[out] assembled CeedVector to store assembled CeedOperator point block 1419 diagonal, provided in row-major form with an 1420 @a num_comp * @a num_comp block at each node. The dimensions 1421 of this vector are derived from the active vector 1422 for the CeedOperator. The array has shape 1423 [nodes, component out, component in]. 1424 @param request Address of CeedRequest for non-blocking completion, else 1425 CEED_REQUEST_IMMEDIATE 1426 1427 @return An error code: 0 - success, otherwise - failure 1428 1429 @ref User 1430 **/ 1431 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1432 CeedVector assembled, CeedRequest *request) { 1433 int ierr; 1434 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1435 1436 // Use backend version, if available 1437 if (op->LinearAssemblePointBlockDiagonal) { 1438 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1439 CeedChk(ierr); 1440 return CEED_ERROR_SUCCESS; 1441 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1442 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1443 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1444 request); CeedChk(ierr); 1445 return CEED_ERROR_SUCCESS; 1446 } else { 1447 // Check for valid fallback resource 1448 const char *resource, *fallback_resource; 1449 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1450 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1451 CeedChk(ierr); 1452 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1453 // Fallback to reference Ceed 1454 if (!op->op_fallback) { 1455 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1456 } 1457 // Assemble 1458 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1459 assembled, request); CeedChk(ierr); 1460 return CEED_ERROR_SUCCESS; 1461 } 1462 } 1463 1464 // Default interface implementation 1465 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1466 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1467 CeedChk(ierr); 1468 return CEED_ERROR_SUCCESS; 1469 } 1470 1471 /** 1472 @brief Assemble the point block diagonal of a square linear CeedOperator 1473 1474 This sums into a CeedVector with the point block diagonal of a linear 1475 CeedOperator. 1476 1477 Note: Currently only non-composite CeedOperators with a single field and 1478 composite CeedOperators with single field sub-operators are supported. 1479 1480 Note: Calling this function asserts that setup is complete 1481 and sets the CeedOperator as immutable. 1482 1483 @param op CeedOperator to assemble CeedQFunction 1484 @param[out] assembled CeedVector to store assembled CeedOperator point block 1485 diagonal, provided in row-major form with an 1486 @a num_comp * @a num_comp block at each node. The dimensions 1487 of this vector are derived from the active vector 1488 for the CeedOperator. The array has shape 1489 [nodes, component out, component in]. 1490 @param request Address of CeedRequest for non-blocking completion, else 1491 CEED_REQUEST_IMMEDIATE 1492 1493 @return An error code: 0 - success, otherwise - failure 1494 1495 @ref User 1496 **/ 1497 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1498 CeedVector assembled, CeedRequest *request) { 1499 int ierr; 1500 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1501 1502 // Use backend version, if available 1503 if (op->LinearAssembleAddPointBlockDiagonal) { 1504 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1505 CeedChk(ierr); 1506 return CEED_ERROR_SUCCESS; 1507 } else { 1508 // Check for valid fallback resource 1509 const char *resource, *fallback_resource; 1510 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1511 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1512 CeedChk(ierr); 1513 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1514 // Fallback to reference Ceed 1515 if (!op->op_fallback) { 1516 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1517 } 1518 // Assemble 1519 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1520 assembled, request); CeedChk(ierr); 1521 return CEED_ERROR_SUCCESS; 1522 } 1523 } 1524 1525 // Default interface implemenation 1526 bool is_composite; 1527 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1528 if (is_composite) { 1529 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1530 true, assembled); CeedChk(ierr); 1531 return CEED_ERROR_SUCCESS; 1532 } else { 1533 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1534 CeedChk(ierr); 1535 return CEED_ERROR_SUCCESS; 1536 } 1537 } 1538 1539 /** 1540 @brief Fully assemble the nonzero pattern of a linear operator. 1541 1542 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1543 1544 The assembly routines use coordinate format, with num_entries tuples of the 1545 form (i, j, value) which indicate that value should be added to the matrix 1546 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1547 This function returns the number of entries and their (i, j) locations, 1548 while CeedOperatorLinearAssemble() provides the values in the same 1549 ordering. 1550 1551 This will generally be slow unless your operator is low-order. 1552 1553 Note: Calling this function asserts that setup is complete 1554 and sets the CeedOperator as immutable. 1555 1556 @param[in] op CeedOperator to assemble 1557 @param[out] num_entries Number of entries in coordinate nonzero pattern 1558 @param[out] rows Row number for each entry 1559 @param[out] cols Column number for each entry 1560 1561 @ref User 1562 **/ 1563 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1564 CeedInt **rows, CeedInt **cols) { 1565 int ierr; 1566 CeedInt num_suboperators, single_entries; 1567 CeedOperator *sub_operators; 1568 bool is_composite; 1569 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1570 1571 // Use backend version, if available 1572 if (op->LinearAssembleSymbolic) { 1573 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1574 return CEED_ERROR_SUCCESS; 1575 } else { 1576 // Check for valid fallback resource 1577 const char *resource, *fallback_resource; 1578 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1579 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1580 CeedChk(ierr); 1581 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1582 // Fallback to reference Ceed 1583 if (!op->op_fallback) { 1584 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1585 } 1586 // Assemble 1587 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1588 cols); CeedChk(ierr); 1589 return CEED_ERROR_SUCCESS; 1590 } 1591 } 1592 1593 // Default interface implementation 1594 1595 // count entries and allocate rows, cols arrays 1596 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1597 *num_entries = 0; 1598 if (is_composite) { 1599 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1600 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1601 for (int k = 0; k < num_suboperators; ++k) { 1602 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1603 &single_entries); CeedChk(ierr); 1604 *num_entries += single_entries; 1605 } 1606 } else { 1607 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1608 &single_entries); CeedChk(ierr); 1609 *num_entries += single_entries; 1610 } 1611 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1612 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1613 1614 // assemble nonzero locations 1615 CeedInt offset = 0; 1616 if (is_composite) { 1617 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1618 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1619 for (int k = 0; k < num_suboperators; ++k) { 1620 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1621 *cols); CeedChk(ierr); 1622 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1623 &single_entries); 1624 CeedChk(ierr); 1625 offset += single_entries; 1626 } 1627 } else { 1628 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1629 CeedChk(ierr); 1630 } 1631 1632 return CEED_ERROR_SUCCESS; 1633 } 1634 1635 /** 1636 @brief Fully assemble the nonzero entries of a linear operator. 1637 1638 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1639 1640 The assembly routines use coordinate format, with num_entries tuples of the 1641 form (i, j, value) which indicate that value should be added to the matrix 1642 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1643 This function returns the values of the nonzero entries to be added, their 1644 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1645 1646 This will generally be slow unless your operator is low-order. 1647 1648 Note: Calling this function asserts that setup is complete 1649 and sets the CeedOperator as immutable. 1650 1651 @param[in] op CeedOperator to assemble 1652 @param[out] values Values to assemble into matrix 1653 1654 @ref User 1655 **/ 1656 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1657 int ierr; 1658 CeedInt num_suboperators, single_entries = 0; 1659 CeedOperator *sub_operators; 1660 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1661 1662 // Use backend version, if available 1663 if (op->LinearAssemble) { 1664 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1665 return CEED_ERROR_SUCCESS; 1666 } else { 1667 // Check for valid fallback resource 1668 const char *resource, *fallback_resource; 1669 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1670 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1671 CeedChk(ierr); 1672 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1673 // Fallback to reference Ceed 1674 if (!op->op_fallback) { 1675 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1676 } 1677 // Assemble 1678 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1679 return CEED_ERROR_SUCCESS; 1680 } 1681 } 1682 1683 // Default interface implementation 1684 bool is_composite; 1685 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1686 1687 CeedInt offset = 0; 1688 if (is_composite) { 1689 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1690 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1691 for (int k = 0; k < num_suboperators; ++k) { 1692 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1693 CeedChk(ierr); 1694 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1695 &single_entries); 1696 CeedChk(ierr); 1697 offset += single_entries; 1698 } 1699 } else { 1700 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1701 } 1702 1703 return CEED_ERROR_SUCCESS; 1704 } 1705 1706 /** 1707 @brief Create a multigrid coarse operator and level transfer operators 1708 for a CeedOperator, creating the prolongation basis from the 1709 fine and coarse grid interpolation 1710 1711 Note: Calling this function asserts that setup is complete 1712 and sets the CeedOperator as immutable. 1713 1714 @param[in] op_fine Fine grid operator 1715 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1716 @param[in] rstr_coarse Coarse grid restriction 1717 @param[in] basis_coarse Coarse grid active vector basis 1718 @param[out] op_coarse Coarse grid operator 1719 @param[out] op_prolong Coarse to fine operator 1720 @param[out] op_restrict Fine to coarse operator 1721 1722 @return An error code: 0 - success, otherwise - failure 1723 1724 @ref User 1725 **/ 1726 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1727 CeedVector p_mult_fine, 1728 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1729 CeedOperator *op_coarse, CeedOperator *op_prolong, 1730 CeedOperator *op_restrict) { 1731 int ierr; 1732 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1733 Ceed ceed; 1734 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1735 1736 // Check for compatible quadrature spaces 1737 CeedBasis basis_fine; 1738 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1739 CeedInt Q_f, Q_c; 1740 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1741 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1742 if (Q_f != Q_c) 1743 // LCOV_EXCL_START 1744 return CeedError(ceed, CEED_ERROR_DIMENSION, 1745 "Bases must have compatible quadrature spaces"); 1746 // LCOV_EXCL_STOP 1747 1748 // Coarse to fine basis 1749 CeedInt P_f, P_c, Q = Q_f; 1750 bool is_tensor_f, is_tensor_c; 1751 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1752 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1753 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1754 if (is_tensor_f && is_tensor_c) { 1755 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1756 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1757 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1758 } else if (!is_tensor_f && !is_tensor_c) { 1759 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1760 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1761 } else { 1762 // LCOV_EXCL_START 1763 return CeedError(ceed, CEED_ERROR_MINOR, 1764 "Bases must both be tensor or non-tensor"); 1765 // LCOV_EXCL_STOP 1766 } 1767 1768 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1769 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1770 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1771 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1772 if (is_tensor_f) { 1773 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1774 memcpy(interp_c, basis_coarse->interp_1d, 1775 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1776 } else { 1777 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1778 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1779 } 1780 1781 // -- QR Factorization, interp_f = Q R 1782 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1783 1784 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1785 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1786 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1787 1788 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1789 for (CeedInt j=0; j<P_c; j++) { // Column j 1790 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1791 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1792 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1793 for (CeedInt k=i+1; k<P_f; k++) 1794 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1795 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1796 } 1797 } 1798 ierr = CeedFree(&tau); CeedChk(ierr); 1799 ierr = CeedFree(&interp_c); CeedChk(ierr); 1800 ierr = CeedFree(&interp_f); CeedChk(ierr); 1801 1802 // Complete with interp_c_to_f versions of code 1803 if (is_tensor_f) { 1804 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1805 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1806 CeedChk(ierr); 1807 } else { 1808 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1809 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1810 CeedChk(ierr); 1811 } 1812 1813 // Cleanup 1814 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1815 return CEED_ERROR_SUCCESS; 1816 } 1817 1818 /** 1819 @brief Create a multigrid coarse operator and level transfer operators 1820 for a CeedOperator with a tensor basis for the active basis 1821 1822 Note: Calling this function asserts that setup is complete 1823 and sets the CeedOperator as immutable. 1824 1825 @param[in] op_fine Fine grid operator 1826 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1827 @param[in] rstr_coarse Coarse grid restriction 1828 @param[in] basis_coarse Coarse grid active vector basis 1829 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1830 @param[out] op_coarse Coarse grid operator 1831 @param[out] op_prolong Coarse to fine operator 1832 @param[out] op_restrict Fine to coarse operator 1833 1834 @return An error code: 0 - success, otherwise - failure 1835 1836 @ref User 1837 **/ 1838 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1839 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1840 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1841 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1842 int ierr; 1843 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1844 Ceed ceed; 1845 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1846 1847 // Check for compatible quadrature spaces 1848 CeedBasis basis_fine; 1849 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1850 CeedInt Q_f, Q_c; 1851 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1852 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1853 if (Q_f != Q_c) 1854 // LCOV_EXCL_START 1855 return CeedError(ceed, CEED_ERROR_DIMENSION, 1856 "Bases must have compatible quadrature spaces"); 1857 // LCOV_EXCL_STOP 1858 1859 // Coarse to fine basis 1860 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1861 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1862 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1863 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1864 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1865 CeedChk(ierr); 1866 P_1d_c = dim == 1 ? num_nodes_c : 1867 dim == 2 ? sqrt(num_nodes_c) : 1868 cbrt(num_nodes_c); 1869 CeedScalar *q_ref, *q_weight, *grad; 1870 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1871 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1872 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1873 CeedBasis basis_c_to_f; 1874 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1875 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1876 CeedChk(ierr); 1877 ierr = CeedFree(&q_ref); CeedChk(ierr); 1878 ierr = CeedFree(&q_weight); CeedChk(ierr); 1879 ierr = CeedFree(&grad); CeedChk(ierr); 1880 1881 // Core code 1882 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1883 basis_coarse, basis_c_to_f, op_coarse, 1884 op_prolong, op_restrict); 1885 CeedChk(ierr); 1886 return CEED_ERROR_SUCCESS; 1887 } 1888 1889 /** 1890 @brief Create a multigrid coarse operator and level transfer operators 1891 for a CeedOperator with a non-tensor basis for the active vector 1892 1893 Note: Calling this function asserts that setup is complete 1894 and sets the CeedOperator as immutable. 1895 1896 @param[in] op_fine Fine grid operator 1897 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1898 @param[in] rstr_coarse Coarse grid restriction 1899 @param[in] basis_coarse Coarse grid active vector basis 1900 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1901 @param[out] op_coarse Coarse grid operator 1902 @param[out] op_prolong Coarse to fine operator 1903 @param[out] op_restrict Fine to coarse operator 1904 1905 @return An error code: 0 - success, otherwise - failure 1906 1907 @ref User 1908 **/ 1909 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1910 CeedVector p_mult_fine, 1911 CeedElemRestriction rstr_coarse, 1912 CeedBasis basis_coarse, 1913 const CeedScalar *interp_c_to_f, 1914 CeedOperator *op_coarse, 1915 CeedOperator *op_prolong, 1916 CeedOperator *op_restrict) { 1917 int ierr; 1918 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1919 Ceed ceed; 1920 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1921 1922 // Check for compatible quadrature spaces 1923 CeedBasis basis_fine; 1924 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1925 CeedInt Q_f, Q_c; 1926 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1927 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1928 if (Q_f != Q_c) 1929 // LCOV_EXCL_START 1930 return CeedError(ceed, CEED_ERROR_DIMENSION, 1931 "Bases must have compatible quadrature spaces"); 1932 // LCOV_EXCL_STOP 1933 1934 // Coarse to fine basis 1935 CeedElemTopology topo; 1936 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1937 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1938 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1939 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1940 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1941 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1942 CeedChk(ierr); 1943 CeedScalar *q_ref, *q_weight, *grad; 1944 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 1945 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1946 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 1947 CeedBasis basis_c_to_f; 1948 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 1949 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1950 CeedChk(ierr); 1951 ierr = CeedFree(&q_ref); CeedChk(ierr); 1952 ierr = CeedFree(&q_weight); CeedChk(ierr); 1953 ierr = CeedFree(&grad); CeedChk(ierr); 1954 1955 // Core code 1956 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1957 basis_coarse, basis_c_to_f, op_coarse, 1958 op_prolong, op_restrict); 1959 CeedChk(ierr); 1960 return CEED_ERROR_SUCCESS; 1961 } 1962 1963 /** 1964 @brief Build a FDM based approximate inverse for each element for a 1965 CeedOperator 1966 1967 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 1968 Method based approximate inverse. This function obtains the simultaneous 1969 diagonalization for the 1D mass and Laplacian operators, 1970 M = V^T V, K = V^T S V. 1971 The assembled QFunction is used to modify the eigenvalues from simultaneous 1972 diagonalization and obtain an approximate inverse of the form 1973 V^T S^hat V. The CeedOperator must be linear and non-composite. The 1974 associated CeedQFunction must therefore also be linear. 1975 1976 Note: Calling this function asserts that setup is complete 1977 and sets the CeedOperator as immutable. 1978 1979 @param op CeedOperator to create element inverses 1980 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 1981 for each element 1982 @param request Address of CeedRequest for non-blocking completion, else 1983 @ref CEED_REQUEST_IMMEDIATE 1984 1985 @return An error code: 0 - success, otherwise - failure 1986 1987 @ref User 1988 **/ 1989 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 1990 CeedRequest *request) { 1991 int ierr; 1992 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1993 1994 // Use backend version, if available 1995 if (op->CreateFDMElementInverse) { 1996 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 1997 return CEED_ERROR_SUCCESS; 1998 } else { 1999 // Check for valid fallback resource 2000 const char *resource, *fallback_resource; 2001 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2002 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2003 CeedChk(ierr); 2004 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2005 // Fallback to reference Ceed 2006 if (!op->op_fallback) { 2007 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2008 } 2009 // Assemble 2010 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2011 CeedChk(ierr); 2012 return CEED_ERROR_SUCCESS; 2013 } 2014 } 2015 2016 // Interface implementation 2017 Ceed ceed, ceed_parent; 2018 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2019 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2020 ceed_parent = ceed_parent ? ceed_parent : ceed; 2021 CeedQFunction qf; 2022 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2023 2024 // Determine active input basis 2025 bool interp = false, grad = false; 2026 CeedBasis basis = NULL; 2027 CeedElemRestriction rstr = NULL; 2028 CeedOperatorField *op_fields; 2029 CeedQFunctionField *qf_fields; 2030 CeedInt num_input_fields; 2031 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2032 CeedChk(ierr); 2033 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2034 for (CeedInt i=0; i<num_input_fields; i++) { 2035 CeedVector vec; 2036 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2037 if (vec == CEED_VECTOR_ACTIVE) { 2038 CeedEvalMode eval_mode; 2039 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2040 interp = interp || eval_mode == CEED_EVAL_INTERP; 2041 grad = grad || eval_mode == CEED_EVAL_GRAD; 2042 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2043 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2044 } 2045 } 2046 if (!basis) 2047 // LCOV_EXCL_START 2048 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2049 // LCOV_EXCL_STOP 2050 CeedSize l_size = 1; 2051 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2052 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2053 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2054 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2055 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2056 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2057 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2058 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2059 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2060 2061 // Build and diagonalize 1D Mass and Laplacian 2062 bool tensor_basis; 2063 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2064 if (!tensor_basis) 2065 // LCOV_EXCL_START 2066 return CeedError(ceed, CEED_ERROR_BACKEND, 2067 "FDMElementInverse only supported for tensor " 2068 "bases"); 2069 // LCOV_EXCL_STOP 2070 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2071 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2072 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2073 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2074 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2075 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2076 // -- Build matrices 2077 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2078 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2079 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2080 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2081 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2082 mass, laplace); CeedChk(ierr); 2083 2084 // -- Diagonalize 2085 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2086 CeedChk(ierr); 2087 ierr = CeedFree(&mass); CeedChk(ierr); 2088 ierr = CeedFree(&laplace); CeedChk(ierr); 2089 for (CeedInt i=0; i<P_1d; i++) 2090 for (CeedInt j=0; j<P_1d; j++) 2091 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2092 ierr = CeedFree(&x); CeedChk(ierr); 2093 2094 // Assemble QFunction 2095 CeedVector assembled; 2096 CeedElemRestriction rstr_qf; 2097 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2098 &rstr_qf, request); CeedChk(ierr); 2099 CeedInt layout[3]; 2100 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2101 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2102 CeedScalar max_norm = 0; 2103 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2104 2105 // Calculate element averages 2106 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2107 CeedScalar *elem_avg; 2108 const CeedScalar *assembled_array, *q_weight_array; 2109 CeedVector q_weight; 2110 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2111 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2112 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2113 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2114 CeedChk(ierr); 2115 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2116 CeedChk(ierr); 2117 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2118 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2119 for (CeedInt e=0; e<num_elem; e++) { 2120 CeedInt count = 0; 2121 for (CeedInt q=0; q<num_qpts; q++) 2122 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2123 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2124 qf_value_bound) { 2125 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2126 q_weight_array[q]; 2127 count++; 2128 } 2129 if (count) { 2130 elem_avg[e] /= count; 2131 } else { 2132 elem_avg[e] = 1.0; 2133 } 2134 } 2135 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2136 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2137 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2138 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2139 2140 // Build FDM diagonal 2141 CeedVector q_data; 2142 CeedScalar *q_data_array, *fdm_diagonal; 2143 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2144 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2145 for (CeedInt c=0; c<num_comp; c++) 2146 for (CeedInt n=0; n<elem_size; n++) { 2147 if (interp) 2148 fdm_diagonal[c*elem_size + n] = 1.0; 2149 if (grad) 2150 for (CeedInt d=0; d<dim; d++) { 2151 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2152 fdm_diagonal[c*elem_size + n] += lambda[i]; 2153 } 2154 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2155 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2156 } 2157 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2158 CeedChk(ierr); 2159 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2160 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2161 CeedChk(ierr); 2162 for (CeedInt e=0; e<num_elem; e++) 2163 for (CeedInt c=0; c<num_comp; c++) 2164 for (CeedInt n=0; n<elem_size; n++) 2165 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2166 fdm_diagonal[c*elem_size + n]); 2167 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2168 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2169 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2170 2171 // Setup FDM operator 2172 // -- Basis 2173 CeedBasis fdm_basis; 2174 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2175 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2176 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2177 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2178 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2179 fdm_interp, grad_dummy, q_ref_dummy, 2180 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2181 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2182 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2183 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2184 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2185 ierr = CeedFree(&lambda); CeedChk(ierr); 2186 2187 // -- Restriction 2188 CeedElemRestriction rstr_qd_i; 2189 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2190 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2191 num_comp, num_elem*num_comp*elem_size, 2192 strides, &rstr_qd_i); CeedChk(ierr); 2193 // -- QFunction 2194 CeedQFunction qf_fdm; 2195 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2196 CeedChk(ierr); 2197 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2198 CeedChk(ierr); 2199 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2200 CeedChk(ierr); 2201 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2202 CeedChk(ierr); 2203 // -- QFunction context 2204 CeedInt *num_comp_data; 2205 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2206 num_comp_data[0] = num_comp; 2207 CeedQFunctionContext ctx_fdm; 2208 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2209 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2210 sizeof(*num_comp_data), num_comp_data); 2211 CeedChk(ierr); 2212 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2213 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2214 // -- Operator 2215 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2216 CeedChk(ierr); 2217 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2218 CeedChk(ierr); 2219 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2220 q_data); CeedChk(ierr); 2221 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2222 CeedChk(ierr); 2223 2224 // Cleanup 2225 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2226 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2227 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2228 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2229 2230 return CEED_ERROR_SUCCESS; 2231 } 2232 2233 /// @} 2234