1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->is_interface_setup = false; 75 op_ref->is_backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 79 &op_ref->qf_assembled); CeedChk(ierr); 80 op->op_fallback = op_ref; 81 82 // Clone QF 83 CeedQFunction qf_ref; 84 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 85 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 86 qf_ref->data = NULL; 87 qf_ref->ceed = ceed_ref; 88 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 89 op_ref->qf = qf_ref; 90 op->qf_fallback = qf_ref; 91 return CEED_ERROR_SUCCESS; 92 } 93 94 /** 95 @brief Select correct basis matrix pointer based on CeedEvalMode 96 97 @param[in] eval_mode Current basis evaluation mode 98 @param[in] identity Pointer to identity matrix 99 @param[in] interp Pointer to interpolation matrix 100 @param[in] grad Pointer to gradient matrix 101 @param[out] basis_ptr Basis pointer to set 102 103 @ref Developer 104 **/ 105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 106 const CeedScalar *identity, const CeedScalar *interp, 107 const CeedScalar *grad, const CeedScalar **basis_ptr) { 108 switch (eval_mode) { 109 case CEED_EVAL_NONE: 110 *basis_ptr = identity; 111 break; 112 case CEED_EVAL_INTERP: 113 *basis_ptr = interp; 114 break; 115 case CEED_EVAL_GRAD: 116 *basis_ptr = grad; 117 break; 118 case CEED_EVAL_WEIGHT: 119 case CEED_EVAL_DIV: 120 case CEED_EVAL_CURL: 121 break; // Caught by QF Assembly 122 } 123 } 124 125 /** 126 @brief Create point block restriction for active operator field 127 128 @param[in] rstr Original CeedElemRestriction for active field 129 @param[out] pointblock_rstr Address of the variable where the newly created 130 CeedElemRestriction will be stored 131 132 @return An error code: 0 - success, otherwise - failure 133 134 @ref Developer 135 **/ 136 static int CeedOperatorCreateActivePointBlockRestriction( 137 CeedElemRestriction rstr, 138 CeedElemRestriction *pointblock_rstr) { 139 int ierr; 140 Ceed ceed; 141 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 142 const CeedInt *offsets; 143 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 144 CeedChk(ierr); 145 146 // Expand offsets 147 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 148 *pointblock_offsets; 149 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 152 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 153 CeedInt shift = num_comp; 154 if (comp_stride != 1) 155 shift *= num_comp; 156 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 157 CeedChk(ierr); 158 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 159 pointblock_offsets[i] = offsets[i]*shift; 160 if (pointblock_offsets[i] > max) 161 max = pointblock_offsets[i]; 162 } 163 164 // Create new restriction 165 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 166 1, max + num_comp*num_comp, CEED_MEM_HOST, 167 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 168 CeedChk(ierr); 169 170 // Cleanup 171 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 172 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else 181 CEED_REQUEST_IMMEDIATE 182 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 183 @param[out] assembled CeedVector to store assembled diagonal 184 185 @return An error code: 0 - success, otherwise - failure 186 187 @ref Developer 188 **/ 189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 190 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 191 int ierr; 192 Ceed ceed; 193 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 194 195 // Assemble QFunction 196 CeedQFunction qf; 197 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 198 CeedInt num_input_fields, num_output_fields; 199 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 200 CeedChk(ierr); 201 CeedVector assembled_qf; 202 CeedElemRestriction rstr; 203 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 204 &rstr, request); CeedChk(ierr); 205 CeedInt layout[3]; 206 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 207 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 208 CeedScalar max_norm = 0; 209 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 210 211 // Determine active input basis 212 CeedOperatorField *op_fields; 213 CeedQFunctionField *qf_fields; 214 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 215 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 216 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 217 CeedEvalMode *eval_mode_in = NULL; 218 CeedBasis basis_in = NULL; 219 CeedElemRestriction rstr_in = NULL; 220 for (CeedInt i=0; i<num_input_fields; i++) { 221 CeedVector vec; 222 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 223 if (vec == CEED_VECTOR_ACTIVE) { 224 CeedElemRestriction rstr; 225 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 226 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 227 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 228 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 229 if (rstr_in && rstr_in != rstr) 230 // LCOV_EXCL_START 231 return CeedError(ceed, CEED_ERROR_BACKEND, 232 "Multi-field non-composite operator diagonal assembly not supported"); 233 // LCOV_EXCL_STOP 234 rstr_in = rstr; 235 CeedEvalMode eval_mode; 236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 237 switch (eval_mode) { 238 case CEED_EVAL_NONE: 239 case CEED_EVAL_INTERP: 240 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 241 eval_mode_in[num_eval_mode_in] = eval_mode; 242 num_eval_mode_in += 1; 243 break; 244 case CEED_EVAL_GRAD: 245 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 246 for (CeedInt d=0; d<dim; d++) 247 eval_mode_in[num_eval_mode_in+d] = eval_mode; 248 num_eval_mode_in += dim; 249 break; 250 case CEED_EVAL_WEIGHT: 251 case CEED_EVAL_DIV: 252 case CEED_EVAL_CURL: 253 break; // Caught by QF Assembly 254 } 255 } 256 } 257 258 // Determine active output basis 259 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 260 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 261 CeedInt num_eval_mode_out = 0; 262 CeedEvalMode *eval_mode_out = NULL; 263 CeedBasis basis_out = NULL; 264 CeedElemRestriction rstr_out = NULL; 265 for (CeedInt i=0; i<num_output_fields; i++) { 266 CeedVector vec; 267 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 268 if (vec == CEED_VECTOR_ACTIVE) { 269 CeedElemRestriction rstr; 270 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 271 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 272 if (rstr_out && rstr_out != rstr) 273 // LCOV_EXCL_START 274 return CeedError(ceed, CEED_ERROR_BACKEND, 275 "Multi-field non-composite operator diagonal assembly not supported"); 276 // LCOV_EXCL_STOP 277 rstr_out = rstr; 278 CeedEvalMode eval_mode; 279 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 280 switch (eval_mode) { 281 case CEED_EVAL_NONE: 282 case CEED_EVAL_INTERP: 283 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 284 eval_mode_out[num_eval_mode_out] = eval_mode; 285 num_eval_mode_out += 1; 286 break; 287 case CEED_EVAL_GRAD: 288 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 289 for (CeedInt d=0; d<dim; d++) 290 eval_mode_out[num_eval_mode_out+d] = eval_mode; 291 num_eval_mode_out += dim; 292 break; 293 case CEED_EVAL_WEIGHT: 294 case CEED_EVAL_DIV: 295 case CEED_EVAL_CURL: 296 break; // Caught by QF Assembly 297 } 298 } 299 } 300 301 // Assemble point block diagonal restriction, if needed 302 CeedElemRestriction diag_rstr = rstr_out; 303 if (is_pointblock) { 304 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 305 CeedChk(ierr); 306 } 307 308 // Create diagonal vector 309 CeedVector elem_diag; 310 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 311 CeedChk(ierr); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array; 315 const CeedScalar *assembled_qf_array; 316 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 317 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 318 CeedChk(ierr); 319 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 320 CeedChk(ierr); 321 CeedInt num_elem, num_nodes, num_qpts; 322 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 323 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 324 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 325 // Basis matrices 326 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 327 CeedScalar *identity = NULL; 328 bool evalNone = false; 329 for (CeedInt i=0; i<num_eval_mode_in; i++) 330 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 331 for (CeedInt i=0; i<num_eval_mode_out; i++) 332 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 333 if (evalNone) { 334 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 335 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 336 identity[i*num_nodes+i] = 1.0; 337 } 338 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 339 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 340 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 341 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 342 // Compute the diagonal of B^T D B 343 // Each element 344 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 345 for (CeedInt e=0; e<num_elem; e++) { 346 CeedInt d_out = -1; 347 // Each basis eval mode pair 348 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 349 const CeedScalar *bt = NULL; 350 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 351 d_out += 1; 352 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 353 &grad_out[d_out*num_qpts*num_nodes], &bt); 354 CeedInt d_in = -1; 355 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 356 const CeedScalar *b = NULL; 357 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 358 d_in += 1; 359 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 360 &grad_in[d_in*num_qpts*num_nodes], &b); 361 // Each component 362 for (CeedInt c_out=0; c_out<num_comp; c_out++) 363 // Each qpoint/node pair 364 for (CeedInt q=0; q<num_qpts; q++) 365 if (is_pointblock) { 366 // Point Block Diagonal 367 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 368 const CeedScalar qf_value = 369 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 370 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 371 if (fabs(qf_value) > qf_value_bound) 372 for (CeedInt n=0; n<num_nodes; n++) 373 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 374 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 375 } 376 } else { 377 // Diagonal Only 378 const CeedScalar qf_value = 379 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 380 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 381 if (fabs(qf_value) > qf_value_bound) 382 for (CeedInt n=0; n<num_nodes; n++) 383 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 384 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 385 } 386 } 387 } 388 } 389 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 390 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 391 CeedChk(ierr); 392 393 // Assemble local operator diagonal 394 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 395 assembled, request); CeedChk(ierr); 396 397 // Cleanup 398 if (is_pointblock) { 399 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 400 } 401 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 402 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 403 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 404 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 405 ierr = CeedFree(&identity); CeedChk(ierr); 406 407 return CEED_ERROR_SUCCESS; 408 } 409 410 /** 411 @brief Core logic for assembling composite operator diagonal 412 413 @param[in] op CeedOperator to assemble point block diagonal 414 @param[in] request Address of CeedRequest for non-blocking completion, else 415 CEED_REQUEST_IMMEDIATE 416 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 417 @param[out] assembled CeedVector to store assembled diagonal 418 419 @return An error code: 0 - success, otherwise - failure 420 421 @ref Developer 422 **/ 423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 424 CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 int ierr; 427 CeedInt num_sub; 428 CeedOperator *suboperators; 429 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 430 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 431 for (CeedInt i = 0; i < num_sub; i++) { 432 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 433 is_pointblock, assembled); CeedChk(ierr); 434 } 435 return CEED_ERROR_SUCCESS; 436 } 437 438 /** 439 @brief Build nonzero pattern for non-composite operator 440 441 Users should generally use CeedOperatorLinearAssembleSymbolic() 442 443 @param[in] op CeedOperator to assemble nonzero pattern 444 @param[in] offset Offset for number of entries 445 @param[out] rows Row number for each entry 446 @param[out] cols Column number for each entry 447 448 @return An error code: 0 - success, otherwise - failure 449 450 @ref Developer 451 **/ 452 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 453 CeedInt *rows, CeedInt *cols) { 454 int ierr; 455 Ceed ceed = op->ceed; 456 if (op->is_composite) 457 // LCOV_EXCL_START 458 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 459 "Composite operator not supported"); 460 // LCOV_EXCL_STOP 461 462 CeedElemRestriction rstr_in; 463 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 464 CeedSize num_nodes; 465 CeedInt num_elem, elem_size, num_comp; 466 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 467 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 468 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 469 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 470 CeedInt layout_er[3]; 471 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 472 473 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 474 475 // Determine elem_dof relation 476 CeedVector index_vec; 477 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 478 CeedScalar *array; 479 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 480 for (CeedInt i = 0; i < num_nodes; ++i) { 481 array[i] = i; 482 } 483 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 484 CeedVector elem_dof; 485 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 486 CeedChk(ierr); 487 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 488 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 489 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 490 const CeedScalar *elem_dof_a; 491 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 492 CeedChk(ierr); 493 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 494 495 // Determine i, j locations for element matrices 496 CeedInt count = 0; 497 for (int e = 0; e < num_elem; ++e) { 498 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 499 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 500 for (int i = 0; i < elem_size; ++i) { 501 for (int j = 0; j < elem_size; ++j) { 502 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 503 (comp_out)*layout_er[1] + e*layout_er[2]; 504 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 505 (comp_in)*layout_er[1] + e*layout_er[2]; 506 507 const CeedInt row = elem_dof_a[elem_dof_index_row]; 508 const CeedInt col = elem_dof_a[elem_dof_index_col]; 509 510 rows[offset + count] = row; 511 cols[offset + count] = col; 512 count++; 513 } 514 } 515 } 516 } 517 } 518 if (count != local_num_entries) 519 // LCOV_EXCL_START 520 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 521 // LCOV_EXCL_STOP 522 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 523 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 524 525 return CEED_ERROR_SUCCESS; 526 } 527 528 /** 529 @brief Assemble nonzero entries for non-composite operator 530 531 Users should generally use CeedOperatorLinearAssemble() 532 533 @param[in] op CeedOperator to assemble 534 @param[out] offset Offest for number of entries 535 @param[out] values Values to assemble into matrix 536 537 @return An error code: 0 - success, otherwise - failure 538 539 @ref Developer 540 **/ 541 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 542 CeedVector values) { 543 int ierr; 544 Ceed ceed = op->ceed; 545 if (op->is_composite) 546 // LCOV_EXCL_START 547 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 548 "Composite operator not supported"); 549 // LCOV_EXCL_STOP 550 551 // Assemble QFunction 552 CeedQFunction qf; 553 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 554 CeedVector assembled_qf; 555 CeedElemRestriction rstr_q; 556 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 557 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 558 559 CeedSize qf_length; 560 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 561 562 CeedInt num_input_fields, num_output_fields; 563 CeedOperatorField *input_fields; 564 CeedOperatorField *output_fields; 565 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 566 &num_output_fields, &output_fields); CeedChk(ierr); 567 568 // Determine active input basis 569 CeedQFunctionField *qf_fields; 570 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 571 CeedInt num_eval_mode_in = 0, dim = 1; 572 CeedEvalMode *eval_mode_in = NULL; 573 CeedBasis basis_in = NULL; 574 CeedElemRestriction rstr_in = NULL; 575 for (CeedInt i=0; i<num_input_fields; i++) { 576 CeedVector vec; 577 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 578 if (vec == CEED_VECTOR_ACTIVE) { 579 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 580 CeedChk(ierr); 581 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 582 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 583 CeedChk(ierr); 584 CeedEvalMode eval_mode; 585 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 586 CeedChk(ierr); 587 switch (eval_mode) { 588 case CEED_EVAL_NONE: 589 case CEED_EVAL_INTERP: 590 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 591 eval_mode_in[num_eval_mode_in] = eval_mode; 592 num_eval_mode_in += 1; 593 break; 594 case CEED_EVAL_GRAD: 595 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 596 for (CeedInt d=0; d<dim; d++) { 597 eval_mode_in[num_eval_mode_in+d] = eval_mode; 598 } 599 num_eval_mode_in += dim; 600 break; 601 case CEED_EVAL_WEIGHT: 602 case CEED_EVAL_DIV: 603 case CEED_EVAL_CURL: 604 break; // Caught by QF Assembly 605 } 606 } 607 } 608 609 // Determine active output basis 610 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 611 CeedInt num_eval_mode_out = 0; 612 CeedEvalMode *eval_mode_out = NULL; 613 CeedBasis basis_out = NULL; 614 CeedElemRestriction rstr_out = NULL; 615 for (CeedInt i=0; i<num_output_fields; i++) { 616 CeedVector vec; 617 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 618 if (vec == CEED_VECTOR_ACTIVE) { 619 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 620 CeedChk(ierr); 621 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 622 CeedChk(ierr); 623 CeedEvalMode eval_mode; 624 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 625 CeedChk(ierr); 626 switch (eval_mode) { 627 case CEED_EVAL_NONE: 628 case CEED_EVAL_INTERP: 629 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 630 eval_mode_out[num_eval_mode_out] = eval_mode; 631 num_eval_mode_out += 1; 632 break; 633 case CEED_EVAL_GRAD: 634 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 635 for (CeedInt d=0; d<dim; d++) { 636 eval_mode_out[num_eval_mode_out+d] = eval_mode; 637 } 638 num_eval_mode_out += dim; 639 break; 640 case CEED_EVAL_WEIGHT: 641 case CEED_EVAL_DIV: 642 case CEED_EVAL_CURL: 643 break; // Caught by QF Assembly 644 } 645 } 646 } 647 648 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 649 // LCOV_EXCL_START 650 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 651 "Cannot assemble operator with out inputs/outputs"); 652 // LCOV_EXCL_STOP 653 654 CeedInt num_elem, elem_size, num_qpts, num_comp; 655 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 656 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 657 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 658 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 659 660 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 661 662 // loop over elements and put in data structure 663 const CeedScalar *interp_in, *grad_in; 664 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 665 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 666 667 const CeedScalar *assembled_qf_array; 668 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 669 CeedChk(ierr); 670 671 CeedInt layout_qf[3]; 672 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 673 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 674 675 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 676 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 677 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 678 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 679 num_qpts]; // logically 3-tensor 680 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 681 CeedScalar elem_mat[elem_size * elem_size]; 682 int count = 0; 683 CeedScalar *vals; 684 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 685 for (int e = 0; e < num_elem; ++e) { 686 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 687 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 688 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 689 B_mat_in[ell] = 0.0; 690 } 691 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 692 B_mat_out[ell] = 0.0; 693 } 694 // Store block-diagonal D matrix as collection of small dense blocks 695 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 696 D_mat[ell] = 0.0; 697 } 698 // form element matrix itself (for each block component) 699 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 700 elem_mat[ell] = 0.0; 701 } 702 for (int q = 0; q < num_qpts; ++q) { 703 for (int n = 0; n < elem_size; ++n) { 704 CeedInt d_in = -1; 705 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 706 const int qq = num_eval_mode_in*q; 707 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 708 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 709 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 710 d_in += 1; 711 B_mat_in[(qq+e_in)*elem_size + n] += 712 grad_in[(d_in*num_qpts+q) * elem_size + n]; 713 } else { 714 // LCOV_EXCL_START 715 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 716 // LCOV_EXCL_STOP 717 } 718 } 719 CeedInt d_out = -1; 720 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 721 const int qq = num_eval_mode_out*q; 722 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 723 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 724 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 725 d_out += 1; 726 B_mat_out[(qq+e_out)*elem_size + n] += 727 grad_in[(d_out*num_qpts+q) * elem_size + n]; 728 } else { 729 // LCOV_EXCL_START 730 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 731 // LCOV_EXCL_STOP 732 } 733 } 734 } 735 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 736 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 737 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 738 +comp_out; 739 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 740 e*layout_qf[2]; 741 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 742 } 743 } 744 } 745 // Compute B^T*D 746 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 747 BTD[ell] = 0.0; 748 } 749 for (int j = 0; j<elem_size; ++j) { 750 for (int q = 0; q<num_qpts; ++q) { 751 int qq = num_eval_mode_out*q; 752 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 753 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 754 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 755 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 756 } 757 } 758 } 759 } 760 761 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 762 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 763 764 // put element matrix in coordinate data structure 765 for (int i = 0; i < elem_size; ++i) { 766 for (int j = 0; j < elem_size; ++j) { 767 vals[offset + count] = elem_mat[i*elem_size + j]; 768 count++; 769 } 770 } 771 } 772 } 773 } 774 if (count != local_num_entries) 775 // LCOV_EXCL_START 776 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 777 // LCOV_EXCL_STOP 778 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 779 780 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 781 CeedChk(ierr); 782 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 783 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 784 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 785 786 return CEED_ERROR_SUCCESS; 787 } 788 789 /** 790 @brief Count number of entries for assembled CeedOperator 791 792 @param[in] op CeedOperator to assemble 793 @param[out] num_entries Number of entries in assembled representation 794 795 @return An error code: 0 - success, otherwise - failure 796 797 @ref Utility 798 **/ 799 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 800 CeedInt *num_entries) { 801 int ierr; 802 CeedElemRestriction rstr; 803 CeedInt num_elem, elem_size, num_comp; 804 805 if (op->is_composite) 806 // LCOV_EXCL_START 807 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 808 "Composite operator not supported"); 809 // LCOV_EXCL_STOP 810 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 811 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 812 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 813 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 814 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 815 816 return CEED_ERROR_SUCCESS; 817 } 818 819 /** 820 @brief Common code for creating a multigrid coarse operator and level 821 transfer operators for a CeedOperator 822 823 @param[in] op_fine Fine grid operator 824 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 825 @param[in] rstr_coarse Coarse grid restriction 826 @param[in] basis_coarse Coarse grid active vector basis 827 @param[in] basis_c_to_f Basis for coarse to fine interpolation 828 @param[out] op_coarse Coarse grid operator 829 @param[out] op_prolong Coarse to fine operator 830 @param[out] op_restrict Fine to coarse operator 831 832 @return An error code: 0 - success, otherwise - failure 833 834 @ref Developer 835 **/ 836 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 837 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 838 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 839 CeedOperator *op_restrict) { 840 int ierr; 841 Ceed ceed; 842 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 843 844 // Check for composite operator 845 bool is_composite; 846 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 847 if (is_composite) 848 // LCOV_EXCL_START 849 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 850 "Automatic multigrid setup for composite operators not supported"); 851 // LCOV_EXCL_STOP 852 853 // Coarse Grid 854 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 855 op_coarse); CeedChk(ierr); 856 CeedElemRestriction rstr_fine = NULL; 857 // -- Clone input fields 858 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 859 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 860 rstr_fine = op_fine->input_fields[i]->elem_restr; 861 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 862 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 863 CeedChk(ierr); 864 } else { 865 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 866 op_fine->input_fields[i]->elem_restr, 867 op_fine->input_fields[i]->basis, 868 op_fine->input_fields[i]->vec); CeedChk(ierr); 869 } 870 } 871 // -- Clone output fields 872 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 873 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 874 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 875 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 876 CeedChk(ierr); 877 } else { 878 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 879 op_fine->output_fields[i]->elem_restr, 880 op_fine->output_fields[i]->basis, 881 op_fine->output_fields[i]->vec); CeedChk(ierr); 882 } 883 } 884 // -- Clone QFunctionAssemblyData 885 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 886 &(*op_coarse)->qf_assembled); CeedChk(ierr); 887 888 // Multiplicity vector 889 CeedVector mult_vec, mult_e_vec; 890 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 891 CeedChk(ierr); 892 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 893 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 894 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 895 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 896 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 897 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 898 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 899 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 900 901 // Restriction 902 CeedInt num_comp; 903 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 904 CeedQFunction qf_restrict; 905 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 906 CeedChk(ierr); 907 CeedInt *num_comp_r_data; 908 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 909 num_comp_r_data[0] = num_comp; 910 CeedQFunctionContext ctx_r; 911 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 912 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 913 sizeof(*num_comp_r_data), num_comp_r_data); 914 CeedChk(ierr); 915 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 916 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 917 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 918 CeedChk(ierr); 919 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 920 CeedChk(ierr); 921 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 922 CEED_EVAL_INTERP); CeedChk(ierr); 923 924 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 925 CEED_QFUNCTION_NONE, op_restrict); 926 CeedChk(ierr); 927 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 928 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 929 CeedChk(ierr); 930 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 931 CEED_BASIS_COLLOCATED, mult_vec); 932 CeedChk(ierr); 933 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 934 CEED_VECTOR_ACTIVE); CeedChk(ierr); 935 936 // Prolongation 937 CeedQFunction qf_prolong; 938 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 939 CeedChk(ierr); 940 CeedInt *num_comp_p_data; 941 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 942 num_comp_p_data[0] = num_comp; 943 CeedQFunctionContext ctx_p; 944 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 945 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 946 sizeof(*num_comp_p_data), num_comp_p_data); 947 CeedChk(ierr); 948 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 949 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 950 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 951 CeedChk(ierr); 952 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 953 CeedChk(ierr); 954 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 955 CeedChk(ierr); 956 957 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 958 CEED_QFUNCTION_NONE, op_prolong); 959 CeedChk(ierr); 960 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 961 CEED_VECTOR_ACTIVE); CeedChk(ierr); 962 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 963 CEED_BASIS_COLLOCATED, mult_vec); 964 CeedChk(ierr); 965 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 966 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 967 CeedChk(ierr); 968 969 // Cleanup 970 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 971 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 972 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 973 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 974 return CEED_ERROR_SUCCESS; 975 } 976 977 /** 978 @brief Build 1D mass matrix and Laplacian with perturbation 979 980 @param[in] interp_1d Interpolation matrix in one dimension 981 @param[in] grad_1d Gradient matrix in one dimension 982 @param[in] q_weight_1d Quadrature weights in one dimension 983 @param[in] P_1d Number of basis nodes in one dimension 984 @param[in] Q_1d Number of quadrature points in one dimension 985 @param[in] dim Dimension of basis 986 @param[out] mass Assembled mass matrix in one dimension 987 @param[out] laplace Assembled perturbed Laplacian in one dimension 988 989 @return An error code: 0 - success, otherwise - failure 990 991 @ref Developer 992 **/ 993 CeedPragmaOptimizeOff 994 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 995 const CeedScalar *grad_1d, 996 const CeedScalar *q_weight_1d, CeedInt P_1d, 997 CeedInt Q_1d, CeedInt dim, 998 CeedScalar *mass, CeedScalar *laplace) { 999 1000 for (CeedInt i=0; i<P_1d; i++) 1001 for (CeedInt j=0; j<P_1d; j++) { 1002 CeedScalar sum = 0.0; 1003 for (CeedInt k=0; k<Q_1d; k++) 1004 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1005 mass[i+j*P_1d] = sum; 1006 } 1007 // -- Laplacian 1008 for (CeedInt i=0; i<P_1d; i++) 1009 for (CeedInt j=0; j<P_1d; j++) { 1010 CeedScalar sum = 0.0; 1011 for (CeedInt k=0; k<Q_1d; k++) 1012 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1013 laplace[i+j*P_1d] = sum; 1014 } 1015 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1016 for (CeedInt i=0; i<P_1d; i++) 1017 laplace[i+P_1d*i] += perturbation; 1018 return CEED_ERROR_SUCCESS; 1019 } 1020 CeedPragmaOptimizeOn 1021 1022 /// @} 1023 1024 /// ---------------------------------------------------------------------------- 1025 /// CeedOperator Backend API 1026 /// ---------------------------------------------------------------------------- 1027 /// @addtogroup CeedOperatorBackend 1028 /// @{ 1029 1030 /** 1031 @brief Create object holding CeedQFunction assembly data for CeedOperator 1032 1033 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1034 @param[out] data Address of the variable where the newly created 1035 CeedQFunctionAssemblyData will be stored 1036 1037 @return An error code: 0 - success, otherwise - failure 1038 1039 @ref Backend 1040 **/ 1041 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1042 CeedQFunctionAssemblyData *data) { 1043 int ierr; 1044 1045 ierr = CeedCalloc(1, data); CeedChk(ierr); 1046 (*data)->ref_count = 1; 1047 (*data)->ceed = ceed; 1048 ierr = CeedReference(ceed); CeedChk(ierr); 1049 1050 return CEED_ERROR_SUCCESS; 1051 } 1052 1053 /** 1054 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1055 1056 @param data CeedQFunctionAssemblyData to increment the reference counter 1057 1058 @return An error code: 0 - success, otherwise - failure 1059 1060 @ref Backend 1061 **/ 1062 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1063 data->ref_count++; 1064 return CEED_ERROR_SUCCESS; 1065 } 1066 1067 /** 1068 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1069 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1070 Note: If `*data_copy` is non-NULL, then it is assumed that 1071 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1072 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1073 the only reference to this CeedQFunctionAssemblyData. 1074 1075 @param data CeedQFunctionAssemblyData to copy reference to 1076 @param[out] data_copy Variable to store copied reference 1077 1078 @return An error code: 0 - success, otherwise - failure 1079 1080 @ref Backend 1081 **/ 1082 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1083 CeedQFunctionAssemblyData *data_copy) { 1084 int ierr; 1085 1086 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1087 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1088 *data_copy = data; 1089 return CEED_ERROR_SUCCESS; 1090 } 1091 1092 /** 1093 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1094 1095 @param[in] data CeedQFunctionAssemblyData to retreive status 1096 @param[out] is_setup Boolean flag for setup status 1097 1098 @return An error code: 0 - success, otherwise - failure 1099 1100 @ref Backend 1101 **/ 1102 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1103 bool *is_setup) { 1104 *is_setup = data->is_setup; 1105 return CEED_ERROR_SUCCESS; 1106 } 1107 1108 /** 1109 @brief Set internal objects for CeedQFunctionAssemblyData 1110 1111 @param[in] data CeedQFunctionAssemblyData to set objects 1112 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1113 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1114 1115 @return An error code: 0 - success, otherwise - failure 1116 1117 @ref Backend 1118 **/ 1119 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1120 CeedVector vec, CeedElemRestriction rstr) { 1121 int ierr; 1122 1123 ierr = CeedVectorDestroy(&data->vec); CeedChk(ierr); 1124 data->vec = vec; 1125 1126 ierr = CeedElemRestrictionDestroy(&data->rstr); CeedChk(ierr); 1127 data->rstr = rstr; 1128 1129 data->is_setup = true; 1130 return CEED_ERROR_SUCCESS; 1131 } 1132 1133 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1134 CeedVector *vec, CeedElemRestriction *rstr) { 1135 if (!data->is_setup) 1136 // LCOV_EXCL_START 1137 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1138 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1139 // LCOV_EXCL_STOP 1140 1141 *vec = data->vec; 1142 *rstr = data->rstr; 1143 1144 return CEED_ERROR_SUCCESS; 1145 } 1146 1147 /** 1148 @brief Destroy CeedQFunctionAssemblyData 1149 1150 @param[out] data CeedQFunctionAssemblyData to destroy 1151 1152 @return An error code: 0 - success, otherwise - failure 1153 1154 @ref Backend 1155 **/ 1156 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1157 int ierr; 1158 1159 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1160 1161 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1162 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1163 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1164 1165 ierr = CeedFree(data); CeedChk(ierr); 1166 return CEED_ERROR_SUCCESS; 1167 } 1168 1169 /// @} 1170 1171 /// ---------------------------------------------------------------------------- 1172 /// CeedOperator Public API 1173 /// ---------------------------------------------------------------------------- 1174 /// @addtogroup CeedOperatorUser 1175 /// @{ 1176 1177 /** 1178 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1179 1180 This returns a CeedVector containing a matrix at each quadrature point 1181 providing the action of the CeedQFunction associated with the CeedOperator. 1182 The vector 'assembled' is of shape 1183 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1184 and contains column-major matrices representing the action of the 1185 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1186 outputs are in the order provided by the user when adding CeedOperator fields. 1187 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1188 'v', provided in that order, would result in an assembled QFunction that 1189 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1190 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1191 1192 Note: Calling this function asserts that setup is complete 1193 and sets the CeedOperator as immutable. 1194 1195 @param op CeedOperator to assemble CeedQFunction 1196 @param[out] assembled CeedVector to store assembled CeedQFunction at 1197 quadrature points 1198 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1199 CeedQFunction 1200 @param request Address of CeedRequest for non-blocking completion, else 1201 @ref CEED_REQUEST_IMMEDIATE 1202 1203 @return An error code: 0 - success, otherwise - failure 1204 1205 @ref User 1206 **/ 1207 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1208 CeedElemRestriction *rstr, 1209 CeedRequest *request) { 1210 int ierr; 1211 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1212 1213 // Backend version 1214 if (op->LinearAssembleQFunction) { 1215 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1216 CeedChk(ierr); 1217 } else { 1218 // Fallback to reference Ceed 1219 if (!op->op_fallback) { 1220 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1221 } 1222 // Assemble 1223 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1224 rstr, request); CeedChk(ierr); 1225 } 1226 return CEED_ERROR_SUCCESS; 1227 } 1228 1229 /** 1230 @brief Assemble CeedQFunction and store result internall. Return copied 1231 references of stored data to the caller. Caller is responsible for 1232 ownership and destruction of the copied references. See also 1233 @ref CeedOperatorLinearAssembleQFunction 1234 1235 @param op CeedOperator to assemble CeedQFunction 1236 @param assembled CeedVector to store assembled CeedQFunction at 1237 quadrature points 1238 @param rstr CeedElemRestriction for CeedVector containing assembled 1239 CeedQFunction 1240 @param request Address of CeedRequest for non-blocking completion, else 1241 @ref CEED_REQUEST_IMMEDIATE 1242 1243 @return An error code: 0 - success, otherwise - failure 1244 1245 @ref User 1246 **/ 1247 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1248 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1249 int ierr; 1250 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1251 1252 // Backend version 1253 if (op->LinearAssembleQFunctionUpdate) { 1254 bool qf_assembled_is_setup; 1255 CeedVector assembled_vec; 1256 CeedElemRestriction assembled_rstr; 1257 1258 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1259 &qf_assembled_is_setup); CeedChk(ierr); 1260 if (qf_assembled_is_setup) { 1261 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1262 &assembled_rstr); CeedChk(ierr); 1263 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1264 request); CeedChk(ierr); 1265 } else { 1266 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1267 request); CeedChk(ierr); 1268 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1269 assembled_rstr); CeedChk(ierr); 1270 } 1271 // Copy reference to internally held copy 1272 *assembled = NULL; 1273 *rstr = NULL; 1274 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1275 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1276 } else { 1277 // Fallback to reference Ceed 1278 if (!op->op_fallback) { 1279 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1280 } 1281 // Assemble 1282 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1283 assembled, rstr, request); CeedChk(ierr); 1284 } 1285 1286 return CEED_ERROR_SUCCESS; 1287 } 1288 1289 /** 1290 @brief Assemble the diagonal of a square linear CeedOperator 1291 1292 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1293 1294 Note: Currently only non-composite CeedOperators with a single field and 1295 composite CeedOperators with single field sub-operators are supported. 1296 1297 Note: Calling this function asserts that setup is complete 1298 and sets the CeedOperator as immutable. 1299 1300 @param op CeedOperator to assemble CeedQFunction 1301 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1302 @param request Address of CeedRequest for non-blocking completion, else 1303 @ref CEED_REQUEST_IMMEDIATE 1304 1305 @return An error code: 0 - success, otherwise - failure 1306 1307 @ref User 1308 **/ 1309 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1310 CeedRequest *request) { 1311 int ierr; 1312 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1313 1314 // Use backend version, if available 1315 if (op->LinearAssembleDiagonal) { 1316 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1317 return CEED_ERROR_SUCCESS; 1318 } else if (op->LinearAssembleAddDiagonal) { 1319 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1320 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1321 return CEED_ERROR_SUCCESS; 1322 } else { 1323 // Check for valid fallback resource 1324 const char *resource, *fallback_resource; 1325 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1326 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1327 CeedChk(ierr); 1328 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1329 // Fallback to reference Ceed 1330 if (!op->op_fallback) { 1331 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1332 } 1333 // Assemble 1334 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1335 CeedChk(ierr); 1336 return CEED_ERROR_SUCCESS; 1337 } 1338 } 1339 1340 // Default interface implementation 1341 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1342 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1343 CeedChk(ierr); 1344 return CEED_ERROR_SUCCESS; 1345 } 1346 1347 /** 1348 @brief Assemble the diagonal of a square linear CeedOperator 1349 1350 This sums into a CeedVector the diagonal of a linear CeedOperator. 1351 1352 Note: Currently only non-composite CeedOperators with a single field and 1353 composite CeedOperators with single field sub-operators are supported. 1354 1355 Note: Calling this function asserts that setup is complete 1356 and sets the CeedOperator as immutable. 1357 1358 @param op CeedOperator to assemble CeedQFunction 1359 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1360 @param request Address of CeedRequest for non-blocking completion, else 1361 @ref CEED_REQUEST_IMMEDIATE 1362 1363 @return An error code: 0 - success, otherwise - failure 1364 1365 @ref User 1366 **/ 1367 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1368 CeedRequest *request) { 1369 int ierr; 1370 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1371 1372 // Use backend version, if available 1373 if (op->LinearAssembleAddDiagonal) { 1374 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1375 return CEED_ERROR_SUCCESS; 1376 } else { 1377 // Check for valid fallback resource 1378 const char *resource, *fallback_resource; 1379 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1380 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1381 CeedChk(ierr); 1382 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1383 // Fallback to reference Ceed 1384 if (!op->op_fallback) { 1385 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1386 } 1387 // Assemble 1388 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1389 request); CeedChk(ierr); 1390 return CEED_ERROR_SUCCESS; 1391 } 1392 } 1393 1394 // Default interface implementation 1395 bool is_composite; 1396 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1397 if (is_composite) { 1398 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1399 false, assembled); CeedChk(ierr); 1400 return CEED_ERROR_SUCCESS; 1401 } else { 1402 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1403 CeedChk(ierr); 1404 return CEED_ERROR_SUCCESS; 1405 } 1406 } 1407 1408 /** 1409 @brief Assemble the point block diagonal of a square linear CeedOperator 1410 1411 This overwrites a CeedVector with the point block diagonal of a linear 1412 CeedOperator. 1413 1414 Note: Currently only non-composite CeedOperators with a single field and 1415 composite CeedOperators with single field sub-operators are supported. 1416 1417 Note: Calling this function asserts that setup is complete 1418 and sets the CeedOperator as immutable. 1419 1420 @param op CeedOperator to assemble CeedQFunction 1421 @param[out] assembled CeedVector to store assembled CeedOperator point block 1422 diagonal, provided in row-major form with an 1423 @a num_comp * @a num_comp block at each node. The dimensions 1424 of this vector are derived from the active vector 1425 for the CeedOperator. The array has shape 1426 [nodes, component out, component in]. 1427 @param request Address of CeedRequest for non-blocking completion, else 1428 CEED_REQUEST_IMMEDIATE 1429 1430 @return An error code: 0 - success, otherwise - failure 1431 1432 @ref User 1433 **/ 1434 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1435 CeedVector assembled, CeedRequest *request) { 1436 int ierr; 1437 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1438 1439 // Use backend version, if available 1440 if (op->LinearAssemblePointBlockDiagonal) { 1441 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1442 CeedChk(ierr); 1443 return CEED_ERROR_SUCCESS; 1444 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1445 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1446 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1447 request); CeedChk(ierr); 1448 return CEED_ERROR_SUCCESS; 1449 } else { 1450 // Check for valid fallback resource 1451 const char *resource, *fallback_resource; 1452 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1453 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1454 CeedChk(ierr); 1455 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1456 // Fallback to reference Ceed 1457 if (!op->op_fallback) { 1458 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1459 } 1460 // Assemble 1461 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1462 assembled, request); CeedChk(ierr); 1463 return CEED_ERROR_SUCCESS; 1464 } 1465 } 1466 1467 // Default interface implementation 1468 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1469 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1470 CeedChk(ierr); 1471 return CEED_ERROR_SUCCESS; 1472 } 1473 1474 /** 1475 @brief Assemble the point block diagonal of a square linear CeedOperator 1476 1477 This sums into a CeedVector with the point block diagonal of a linear 1478 CeedOperator. 1479 1480 Note: Currently only non-composite CeedOperators with a single field and 1481 composite CeedOperators with single field sub-operators are supported. 1482 1483 Note: Calling this function asserts that setup is complete 1484 and sets the CeedOperator as immutable. 1485 1486 @param op CeedOperator to assemble CeedQFunction 1487 @param[out] assembled CeedVector to store assembled CeedOperator point block 1488 diagonal, provided in row-major form with an 1489 @a num_comp * @a num_comp block at each node. The dimensions 1490 of this vector are derived from the active vector 1491 for the CeedOperator. The array has shape 1492 [nodes, component out, component in]. 1493 @param request Address of CeedRequest for non-blocking completion, else 1494 CEED_REQUEST_IMMEDIATE 1495 1496 @return An error code: 0 - success, otherwise - failure 1497 1498 @ref User 1499 **/ 1500 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1501 CeedVector assembled, CeedRequest *request) { 1502 int ierr; 1503 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1504 1505 // Use backend version, if available 1506 if (op->LinearAssembleAddPointBlockDiagonal) { 1507 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1508 CeedChk(ierr); 1509 return CEED_ERROR_SUCCESS; 1510 } else { 1511 // Check for valid fallback resource 1512 const char *resource, *fallback_resource; 1513 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1514 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1515 CeedChk(ierr); 1516 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1517 // Fallback to reference Ceed 1518 if (!op->op_fallback) { 1519 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1520 } 1521 // Assemble 1522 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1523 assembled, request); CeedChk(ierr); 1524 return CEED_ERROR_SUCCESS; 1525 } 1526 } 1527 1528 // Default interface implemenation 1529 bool is_composite; 1530 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1531 if (is_composite) { 1532 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1533 true, assembled); CeedChk(ierr); 1534 return CEED_ERROR_SUCCESS; 1535 } else { 1536 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1537 CeedChk(ierr); 1538 return CEED_ERROR_SUCCESS; 1539 } 1540 } 1541 1542 /** 1543 @brief Fully assemble the nonzero pattern of a linear operator. 1544 1545 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1546 1547 The assembly routines use coordinate format, with num_entries tuples of the 1548 form (i, j, value) which indicate that value should be added to the matrix 1549 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1550 This function returns the number of entries and their (i, j) locations, 1551 while CeedOperatorLinearAssemble() provides the values in the same 1552 ordering. 1553 1554 This will generally be slow unless your operator is low-order. 1555 1556 Note: Calling this function asserts that setup is complete 1557 and sets the CeedOperator as immutable. 1558 1559 @param[in] op CeedOperator to assemble 1560 @param[out] num_entries Number of entries in coordinate nonzero pattern 1561 @param[out] rows Row number for each entry 1562 @param[out] cols Column number for each entry 1563 1564 @ref User 1565 **/ 1566 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1567 CeedInt **rows, CeedInt **cols) { 1568 int ierr; 1569 CeedInt num_suboperators, single_entries; 1570 CeedOperator *sub_operators; 1571 bool is_composite; 1572 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1573 1574 // Use backend version, if available 1575 if (op->LinearAssembleSymbolic) { 1576 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1577 return CEED_ERROR_SUCCESS; 1578 } else { 1579 // Check for valid fallback resource 1580 const char *resource, *fallback_resource; 1581 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1582 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1583 CeedChk(ierr); 1584 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1585 // Fallback to reference Ceed 1586 if (!op->op_fallback) { 1587 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1588 } 1589 // Assemble 1590 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1591 cols); CeedChk(ierr); 1592 return CEED_ERROR_SUCCESS; 1593 } 1594 } 1595 1596 // Default interface implementation 1597 1598 // count entries and allocate rows, cols arrays 1599 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1600 *num_entries = 0; 1601 if (is_composite) { 1602 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1603 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1604 for (int k = 0; k < num_suboperators; ++k) { 1605 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1606 &single_entries); CeedChk(ierr); 1607 *num_entries += single_entries; 1608 } 1609 } else { 1610 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1611 &single_entries); CeedChk(ierr); 1612 *num_entries += single_entries; 1613 } 1614 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1615 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1616 1617 // assemble nonzero locations 1618 CeedInt offset = 0; 1619 if (is_composite) { 1620 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1621 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1622 for (int k = 0; k < num_suboperators; ++k) { 1623 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1624 *cols); CeedChk(ierr); 1625 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1626 &single_entries); 1627 CeedChk(ierr); 1628 offset += single_entries; 1629 } 1630 } else { 1631 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1632 CeedChk(ierr); 1633 } 1634 1635 return CEED_ERROR_SUCCESS; 1636 } 1637 1638 /** 1639 @brief Fully assemble the nonzero entries of a linear operator. 1640 1641 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1642 1643 The assembly routines use coordinate format, with num_entries tuples of the 1644 form (i, j, value) which indicate that value should be added to the matrix 1645 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1646 This function returns the values of the nonzero entries to be added, their 1647 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1648 1649 This will generally be slow unless your operator is low-order. 1650 1651 Note: Calling this function asserts that setup is complete 1652 and sets the CeedOperator as immutable. 1653 1654 @param[in] op CeedOperator to assemble 1655 @param[out] values Values to assemble into matrix 1656 1657 @ref User 1658 **/ 1659 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1660 int ierr; 1661 CeedInt num_suboperators, single_entries = 0; 1662 CeedOperator *sub_operators; 1663 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1664 1665 // Use backend version, if available 1666 if (op->LinearAssemble) { 1667 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1668 return CEED_ERROR_SUCCESS; 1669 } else { 1670 // Check for valid fallback resource 1671 const char *resource, *fallback_resource; 1672 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1673 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1674 CeedChk(ierr); 1675 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1676 // Fallback to reference Ceed 1677 if (!op->op_fallback) { 1678 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1679 } 1680 // Assemble 1681 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1682 return CEED_ERROR_SUCCESS; 1683 } 1684 } 1685 1686 // Default interface implementation 1687 bool is_composite; 1688 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1689 1690 CeedInt offset = 0; 1691 if (is_composite) { 1692 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1693 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1694 for (int k = 0; k < num_suboperators; ++k) { 1695 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1696 CeedChk(ierr); 1697 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1698 &single_entries); 1699 CeedChk(ierr); 1700 offset += single_entries; 1701 } 1702 } else { 1703 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1704 } 1705 1706 return CEED_ERROR_SUCCESS; 1707 } 1708 1709 /** 1710 @brief Create a multigrid coarse operator and level transfer operators 1711 for a CeedOperator, creating the prolongation basis from the 1712 fine and coarse grid interpolation 1713 1714 Note: Calling this function asserts that setup is complete 1715 and sets the CeedOperator as immutable. 1716 1717 @param[in] op_fine Fine grid operator 1718 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1719 @param[in] rstr_coarse Coarse grid restriction 1720 @param[in] basis_coarse Coarse grid active vector basis 1721 @param[out] op_coarse Coarse grid operator 1722 @param[out] op_prolong Coarse to fine operator 1723 @param[out] op_restrict Fine to coarse operator 1724 1725 @return An error code: 0 - success, otherwise - failure 1726 1727 @ref User 1728 **/ 1729 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1730 CeedVector p_mult_fine, 1731 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1732 CeedOperator *op_coarse, CeedOperator *op_prolong, 1733 CeedOperator *op_restrict) { 1734 int ierr; 1735 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1736 Ceed ceed; 1737 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1738 1739 // Check for compatible quadrature spaces 1740 CeedBasis basis_fine; 1741 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1742 CeedInt Q_f, Q_c; 1743 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1744 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1745 if (Q_f != Q_c) 1746 // LCOV_EXCL_START 1747 return CeedError(ceed, CEED_ERROR_DIMENSION, 1748 "Bases must have compatible quadrature spaces"); 1749 // LCOV_EXCL_STOP 1750 1751 // Coarse to fine basis 1752 CeedInt P_f, P_c, Q = Q_f; 1753 bool is_tensor_f, is_tensor_c; 1754 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1755 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1756 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1757 if (is_tensor_f && is_tensor_c) { 1758 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1759 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1760 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1761 } else if (!is_tensor_f && !is_tensor_c) { 1762 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1763 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1764 } else { 1765 // LCOV_EXCL_START 1766 return CeedError(ceed, CEED_ERROR_MINOR, 1767 "Bases must both be tensor or non-tensor"); 1768 // LCOV_EXCL_STOP 1769 } 1770 1771 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1772 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1773 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1774 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1775 if (is_tensor_f) { 1776 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1777 memcpy(interp_c, basis_coarse->interp_1d, 1778 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1779 } else { 1780 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1781 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1782 } 1783 1784 // -- QR Factorization, interp_f = Q R 1785 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1786 1787 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1788 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1789 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1790 1791 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1792 for (CeedInt j=0; j<P_c; j++) { // Column j 1793 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1794 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1795 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1796 for (CeedInt k=i+1; k<P_f; k++) 1797 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1798 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1799 } 1800 } 1801 ierr = CeedFree(&tau); CeedChk(ierr); 1802 ierr = CeedFree(&interp_c); CeedChk(ierr); 1803 ierr = CeedFree(&interp_f); CeedChk(ierr); 1804 1805 // Complete with interp_c_to_f versions of code 1806 if (is_tensor_f) { 1807 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1808 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1809 CeedChk(ierr); 1810 } else { 1811 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1812 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1813 CeedChk(ierr); 1814 } 1815 1816 // Cleanup 1817 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1818 return CEED_ERROR_SUCCESS; 1819 } 1820 1821 /** 1822 @brief Create a multigrid coarse operator and level transfer operators 1823 for a CeedOperator with a tensor basis for the active basis 1824 1825 Note: Calling this function asserts that setup is complete 1826 and sets the CeedOperator as immutable. 1827 1828 @param[in] op_fine Fine grid operator 1829 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1830 @param[in] rstr_coarse Coarse grid restriction 1831 @param[in] basis_coarse Coarse grid active vector basis 1832 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1833 @param[out] op_coarse Coarse grid operator 1834 @param[out] op_prolong Coarse to fine operator 1835 @param[out] op_restrict Fine to coarse operator 1836 1837 @return An error code: 0 - success, otherwise - failure 1838 1839 @ref User 1840 **/ 1841 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1842 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1843 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1844 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1845 int ierr; 1846 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1847 Ceed ceed; 1848 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1849 1850 // Check for compatible quadrature spaces 1851 CeedBasis basis_fine; 1852 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1853 CeedInt Q_f, Q_c; 1854 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1855 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1856 if (Q_f != Q_c) 1857 // LCOV_EXCL_START 1858 return CeedError(ceed, CEED_ERROR_DIMENSION, 1859 "Bases must have compatible quadrature spaces"); 1860 // LCOV_EXCL_STOP 1861 1862 // Coarse to fine basis 1863 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1864 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1865 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1866 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1867 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1868 CeedChk(ierr); 1869 P_1d_c = dim == 1 ? num_nodes_c : 1870 dim == 2 ? sqrt(num_nodes_c) : 1871 cbrt(num_nodes_c); 1872 CeedScalar *q_ref, *q_weight, *grad; 1873 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1874 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1875 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1876 CeedBasis basis_c_to_f; 1877 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1878 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1879 CeedChk(ierr); 1880 ierr = CeedFree(&q_ref); CeedChk(ierr); 1881 ierr = CeedFree(&q_weight); CeedChk(ierr); 1882 ierr = CeedFree(&grad); CeedChk(ierr); 1883 1884 // Core code 1885 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1886 basis_coarse, basis_c_to_f, op_coarse, 1887 op_prolong, op_restrict); 1888 CeedChk(ierr); 1889 return CEED_ERROR_SUCCESS; 1890 } 1891 1892 /** 1893 @brief Create a multigrid coarse operator and level transfer operators 1894 for a CeedOperator with a non-tensor basis for the active vector 1895 1896 Note: Calling this function asserts that setup is complete 1897 and sets the CeedOperator as immutable. 1898 1899 @param[in] op_fine Fine grid operator 1900 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1901 @param[in] rstr_coarse Coarse grid restriction 1902 @param[in] basis_coarse Coarse grid active vector basis 1903 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1904 @param[out] op_coarse Coarse grid operator 1905 @param[out] op_prolong Coarse to fine operator 1906 @param[out] op_restrict Fine to coarse operator 1907 1908 @return An error code: 0 - success, otherwise - failure 1909 1910 @ref User 1911 **/ 1912 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1913 CeedVector p_mult_fine, 1914 CeedElemRestriction rstr_coarse, 1915 CeedBasis basis_coarse, 1916 const CeedScalar *interp_c_to_f, 1917 CeedOperator *op_coarse, 1918 CeedOperator *op_prolong, 1919 CeedOperator *op_restrict) { 1920 int ierr; 1921 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1922 Ceed ceed; 1923 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1924 1925 // Check for compatible quadrature spaces 1926 CeedBasis basis_fine; 1927 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1928 CeedInt Q_f, Q_c; 1929 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1930 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1931 if (Q_f != Q_c) 1932 // LCOV_EXCL_START 1933 return CeedError(ceed, CEED_ERROR_DIMENSION, 1934 "Bases must have compatible quadrature spaces"); 1935 // LCOV_EXCL_STOP 1936 1937 // Coarse to fine basis 1938 CeedElemTopology topo; 1939 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1940 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1941 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1942 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1943 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1944 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1945 CeedChk(ierr); 1946 CeedScalar *q_ref, *q_weight, *grad; 1947 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 1948 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1949 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 1950 CeedBasis basis_c_to_f; 1951 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 1952 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1953 CeedChk(ierr); 1954 ierr = CeedFree(&q_ref); CeedChk(ierr); 1955 ierr = CeedFree(&q_weight); CeedChk(ierr); 1956 ierr = CeedFree(&grad); CeedChk(ierr); 1957 1958 // Core code 1959 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1960 basis_coarse, basis_c_to_f, op_coarse, 1961 op_prolong, op_restrict); 1962 CeedChk(ierr); 1963 return CEED_ERROR_SUCCESS; 1964 } 1965 1966 /** 1967 @brief Build a FDM based approximate inverse for each element for a 1968 CeedOperator 1969 1970 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 1971 Method based approximate inverse. This function obtains the simultaneous 1972 diagonalization for the 1D mass and Laplacian operators, 1973 M = V^T V, K = V^T S V. 1974 The assembled QFunction is used to modify the eigenvalues from simultaneous 1975 diagonalization and obtain an approximate inverse of the form 1976 V^T S^hat V. The CeedOperator must be linear and non-composite. The 1977 associated CeedQFunction must therefore also be linear. 1978 1979 Note: Calling this function asserts that setup is complete 1980 and sets the CeedOperator as immutable. 1981 1982 @param op CeedOperator to create element inverses 1983 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 1984 for each element 1985 @param request Address of CeedRequest for non-blocking completion, else 1986 @ref CEED_REQUEST_IMMEDIATE 1987 1988 @return An error code: 0 - success, otherwise - failure 1989 1990 @ref User 1991 **/ 1992 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 1993 CeedRequest *request) { 1994 int ierr; 1995 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1996 1997 // Use backend version, if available 1998 if (op->CreateFDMElementInverse) { 1999 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2000 return CEED_ERROR_SUCCESS; 2001 } else { 2002 // Check for valid fallback resource 2003 const char *resource, *fallback_resource; 2004 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2005 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2006 CeedChk(ierr); 2007 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2008 // Fallback to reference Ceed 2009 if (!op->op_fallback) { 2010 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2011 } 2012 // Assemble 2013 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2014 CeedChk(ierr); 2015 return CEED_ERROR_SUCCESS; 2016 } 2017 } 2018 2019 // Interface implementation 2020 Ceed ceed, ceed_parent; 2021 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2022 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2023 ceed_parent = ceed_parent ? ceed_parent : ceed; 2024 CeedQFunction qf; 2025 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2026 2027 // Determine active input basis 2028 bool interp = false, grad = false; 2029 CeedBasis basis = NULL; 2030 CeedElemRestriction rstr = NULL; 2031 CeedOperatorField *op_fields; 2032 CeedQFunctionField *qf_fields; 2033 CeedInt num_input_fields; 2034 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2035 CeedChk(ierr); 2036 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2037 for (CeedInt i=0; i<num_input_fields; i++) { 2038 CeedVector vec; 2039 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2040 if (vec == CEED_VECTOR_ACTIVE) { 2041 CeedEvalMode eval_mode; 2042 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2043 interp = interp || eval_mode == CEED_EVAL_INTERP; 2044 grad = grad || eval_mode == CEED_EVAL_GRAD; 2045 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2046 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2047 } 2048 } 2049 if (!basis) 2050 // LCOV_EXCL_START 2051 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2052 // LCOV_EXCL_STOP 2053 CeedSize l_size = 1; 2054 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2055 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2056 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2057 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2058 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2059 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2060 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2061 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2062 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2063 2064 // Build and diagonalize 1D Mass and Laplacian 2065 bool tensor_basis; 2066 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2067 if (!tensor_basis) 2068 // LCOV_EXCL_START 2069 return CeedError(ceed, CEED_ERROR_BACKEND, 2070 "FDMElementInverse only supported for tensor " 2071 "bases"); 2072 // LCOV_EXCL_STOP 2073 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2074 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2075 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2076 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2077 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2078 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2079 // -- Build matrices 2080 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2081 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2082 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2083 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2084 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2085 mass, laplace); CeedChk(ierr); 2086 2087 // -- Diagonalize 2088 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2089 CeedChk(ierr); 2090 ierr = CeedFree(&mass); CeedChk(ierr); 2091 ierr = CeedFree(&laplace); CeedChk(ierr); 2092 for (CeedInt i=0; i<P_1d; i++) 2093 for (CeedInt j=0; j<P_1d; j++) 2094 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2095 ierr = CeedFree(&x); CeedChk(ierr); 2096 2097 // Assemble QFunction 2098 CeedVector assembled; 2099 CeedElemRestriction rstr_qf; 2100 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2101 &rstr_qf, request); CeedChk(ierr); 2102 CeedInt layout[3]; 2103 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2104 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2105 CeedScalar max_norm = 0; 2106 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2107 2108 // Calculate element averages 2109 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2110 CeedScalar *elem_avg; 2111 const CeedScalar *assembled_array, *q_weight_array; 2112 CeedVector q_weight; 2113 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2114 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2115 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2116 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2117 CeedChk(ierr); 2118 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2119 CeedChk(ierr); 2120 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2121 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2122 for (CeedInt e=0; e<num_elem; e++) { 2123 CeedInt count = 0; 2124 for (CeedInt q=0; q<num_qpts; q++) 2125 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2126 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2127 qf_value_bound) { 2128 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2129 q_weight_array[q]; 2130 count++; 2131 } 2132 if (count) { 2133 elem_avg[e] /= count; 2134 } else { 2135 elem_avg[e] = 1.0; 2136 } 2137 } 2138 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2139 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2140 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2141 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2142 2143 // Build FDM diagonal 2144 CeedVector q_data; 2145 CeedScalar *q_data_array, *fdm_diagonal; 2146 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2147 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2148 for (CeedInt c=0; c<num_comp; c++) 2149 for (CeedInt n=0; n<elem_size; n++) { 2150 if (interp) 2151 fdm_diagonal[c*elem_size + n] = 1.0; 2152 if (grad) 2153 for (CeedInt d=0; d<dim; d++) { 2154 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2155 fdm_diagonal[c*elem_size + n] += lambda[i]; 2156 } 2157 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2158 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2159 } 2160 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2161 CeedChk(ierr); 2162 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2163 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2164 CeedChk(ierr); 2165 for (CeedInt e=0; e<num_elem; e++) 2166 for (CeedInt c=0; c<num_comp; c++) 2167 for (CeedInt n=0; n<elem_size; n++) 2168 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2169 fdm_diagonal[c*elem_size + n]); 2170 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2171 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2172 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2173 2174 // Setup FDM operator 2175 // -- Basis 2176 CeedBasis fdm_basis; 2177 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2178 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2179 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2180 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2181 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2182 fdm_interp, grad_dummy, q_ref_dummy, 2183 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2184 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2185 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2186 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2187 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2188 ierr = CeedFree(&lambda); CeedChk(ierr); 2189 2190 // -- Restriction 2191 CeedElemRestriction rstr_qd_i; 2192 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2193 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2194 num_comp, num_elem*num_comp*elem_size, 2195 strides, &rstr_qd_i); CeedChk(ierr); 2196 // -- QFunction 2197 CeedQFunction qf_fdm; 2198 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2199 CeedChk(ierr); 2200 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2201 CeedChk(ierr); 2202 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2203 CeedChk(ierr); 2204 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2205 CeedChk(ierr); 2206 // -- QFunction context 2207 CeedInt *num_comp_data; 2208 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2209 num_comp_data[0] = num_comp; 2210 CeedQFunctionContext ctx_fdm; 2211 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2212 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2213 sizeof(*num_comp_data), num_comp_data); 2214 CeedChk(ierr); 2215 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2216 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2217 // -- Operator 2218 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2219 CeedChk(ierr); 2220 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2221 CeedChk(ierr); 2222 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2223 q_data); CeedChk(ierr); 2224 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2225 CeedChk(ierr); 2226 2227 // Cleanup 2228 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2229 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2230 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2231 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2232 2233 return CEED_ERROR_SUCCESS; 2234 } 2235 2236 /// @} 2237