1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->is_interface_setup = false; 75 op_ref->is_backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 op->op_fallback = op_ref; 79 80 // Clone QF 81 CeedQFunction qf_ref; 82 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 83 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 84 qf_ref->data = NULL; 85 qf_ref->ceed = ceed_ref; 86 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 87 op_ref->qf = qf_ref; 88 op->qf_fallback = qf_ref; 89 return CEED_ERROR_SUCCESS; 90 } 91 92 /** 93 @brief Select correct basis matrix pointer based on CeedEvalMode 94 95 @param[in] eval_mode Current basis evaluation mode 96 @param[in] identity Pointer to identity matrix 97 @param[in] interp Pointer to interpolation matrix 98 @param[in] grad Pointer to gradient matrix 99 @param[out] basis_ptr Basis pointer to set 100 101 @return none 102 103 @ref Developer 104 **/ 105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 106 const CeedScalar *identity, const CeedScalar *interp, 107 const CeedScalar *grad, const CeedScalar **basis_ptr) { 108 switch (eval_mode) { 109 case CEED_EVAL_NONE: 110 *basis_ptr = identity; 111 break; 112 case CEED_EVAL_INTERP: 113 *basis_ptr = interp; 114 break; 115 case CEED_EVAL_GRAD: 116 *basis_ptr = grad; 117 break; 118 case CEED_EVAL_WEIGHT: 119 case CEED_EVAL_DIV: 120 case CEED_EVAL_CURL: 121 break; // Caught by QF Assembly 122 } 123 } 124 125 /** 126 @brief Create point block restriction for active operator field 127 128 @param[in] rstr Original CeedElemRestriction for active field 129 @param[out] pointblock_rstr Address of the variable where the newly created 130 CeedElemRestriction will be stored 131 132 @return An error code: 0 - success, otherwise - failure 133 134 @ref Developer 135 **/ 136 static int CeedOperatorCreateActivePointBlockRestriction( 137 CeedElemRestriction rstr, 138 CeedElemRestriction *pointblock_rstr) { 139 int ierr; 140 Ceed ceed; 141 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 142 const CeedInt *offsets; 143 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 144 CeedChk(ierr); 145 146 // Expand offsets 147 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 148 *pointblock_offsets; 149 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 152 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 153 CeedInt shift = num_comp; 154 if (comp_stride != 1) 155 shift *= num_comp; 156 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 157 CeedChk(ierr); 158 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 159 pointblock_offsets[i] = offsets[i]*shift; 160 if (pointblock_offsets[i] > max) 161 max = pointblock_offsets[i]; 162 } 163 164 // Create new restriction 165 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 166 1, max + num_comp*num_comp, CEED_MEM_HOST, 167 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 168 CeedChk(ierr); 169 170 // Cleanup 171 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 172 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else 181 CEED_REQUEST_IMMEDIATE 182 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 183 @param[out] assembled CeedVector to store assembled diagonal 184 185 @return An error code: 0 - success, otherwise - failure 186 187 @ref Developer 188 **/ 189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 190 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 191 int ierr; 192 Ceed ceed; 193 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 194 195 // Assemble QFunction 196 CeedQFunction qf; 197 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 198 CeedInt num_input_fields, num_output_fields; 199 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 200 CeedChk(ierr); 201 CeedVector assembled_qf; 202 CeedElemRestriction rstr; 203 ierr = CeedOperatorLinearAssembleQFunction(op, &assembled_qf, &rstr, request); 204 CeedChk(ierr); 205 CeedInt layout[3]; 206 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 207 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 208 CeedScalar max_norm = 0; 209 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 210 211 // Determine active input basis 212 CeedOperatorField *op_fields; 213 CeedQFunctionField *qf_fields; 214 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 215 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 216 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 217 CeedEvalMode *eval_mode_in = NULL; 218 CeedBasis basis_in = NULL; 219 CeedElemRestriction rstr_in = NULL; 220 for (CeedInt i=0; i<num_input_fields; i++) { 221 CeedVector vec; 222 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 223 if (vec == CEED_VECTOR_ACTIVE) { 224 CeedElemRestriction rstr; 225 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 226 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 227 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 228 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 229 if (rstr_in && rstr_in != rstr) 230 // LCOV_EXCL_START 231 return CeedError(ceed, CEED_ERROR_BACKEND, 232 "Multi-field non-composite operator diagonal assembly not supported"); 233 // LCOV_EXCL_STOP 234 rstr_in = rstr; 235 CeedEvalMode eval_mode; 236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 237 switch (eval_mode) { 238 case CEED_EVAL_NONE: 239 case CEED_EVAL_INTERP: 240 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 241 eval_mode_in[num_eval_mode_in] = eval_mode; 242 num_eval_mode_in += 1; 243 break; 244 case CEED_EVAL_GRAD: 245 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 246 for (CeedInt d=0; d<dim; d++) 247 eval_mode_in[num_eval_mode_in+d] = eval_mode; 248 num_eval_mode_in += dim; 249 break; 250 case CEED_EVAL_WEIGHT: 251 case CEED_EVAL_DIV: 252 case CEED_EVAL_CURL: 253 break; // Caught by QF Assembly 254 } 255 } 256 } 257 258 // Determine active output basis 259 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 260 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 261 CeedInt num_eval_mode_out = 0; 262 CeedEvalMode *eval_mode_out = NULL; 263 CeedBasis basis_out = NULL; 264 CeedElemRestriction rstr_out = NULL; 265 for (CeedInt i=0; i<num_output_fields; i++) { 266 CeedVector vec; 267 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 268 if (vec == CEED_VECTOR_ACTIVE) { 269 CeedElemRestriction rstr; 270 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 271 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 272 if (rstr_out && rstr_out != rstr) 273 // LCOV_EXCL_START 274 return CeedError(ceed, CEED_ERROR_BACKEND, 275 "Multi-field non-composite operator diagonal assembly not supported"); 276 // LCOV_EXCL_STOP 277 rstr_out = rstr; 278 CeedEvalMode eval_mode; 279 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 280 switch (eval_mode) { 281 case CEED_EVAL_NONE: 282 case CEED_EVAL_INTERP: 283 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 284 eval_mode_out[num_eval_mode_out] = eval_mode; 285 num_eval_mode_out += 1; 286 break; 287 case CEED_EVAL_GRAD: 288 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 289 for (CeedInt d=0; d<dim; d++) 290 eval_mode_out[num_eval_mode_out+d] = eval_mode; 291 num_eval_mode_out += dim; 292 break; 293 case CEED_EVAL_WEIGHT: 294 case CEED_EVAL_DIV: 295 case CEED_EVAL_CURL: 296 break; // Caught by QF Assembly 297 } 298 } 299 } 300 301 // Assemble point block diagonal restriction, if needed 302 CeedElemRestriction diag_rstr = rstr_out; 303 if (is_pointblock) { 304 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 305 CeedChk(ierr); 306 } 307 308 // Create diagonal vector 309 CeedVector elem_diag; 310 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 311 CeedChk(ierr); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array, *assembled_qf_array; 315 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 316 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 317 CeedChk(ierr); 318 ierr = CeedVectorGetArray(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 319 CeedChk(ierr); 320 CeedInt num_elem, num_nodes, num_qpts; 321 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 322 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 323 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 324 // Basis matrices 325 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 326 CeedScalar *identity = NULL; 327 bool evalNone = false; 328 for (CeedInt i=0; i<num_eval_mode_in; i++) 329 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 330 for (CeedInt i=0; i<num_eval_mode_out; i++) 331 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 332 if (evalNone) { 333 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 334 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 335 identity[i*num_nodes+i] = 1.0; 336 } 337 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 338 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 339 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 340 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 341 // Compute the diagonal of B^T D B 342 // Each element 343 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 344 for (CeedInt e=0; e<num_elem; e++) { 345 CeedInt d_out = -1; 346 // Each basis eval mode pair 347 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 348 const CeedScalar *bt = NULL; 349 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 350 d_out += 1; 351 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 352 &grad_out[d_out*num_qpts*num_nodes], &bt); 353 CeedInt d_in = -1; 354 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 355 const CeedScalar *b = NULL; 356 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 357 d_in += 1; 358 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 359 &grad_in[d_in*num_qpts*num_nodes], &b); 360 // Each component 361 for (CeedInt c_out=0; c_out<num_comp; c_out++) 362 // Each qpoint/node pair 363 for (CeedInt q=0; q<num_qpts; q++) 364 if (is_pointblock) { 365 // Point Block Diagonal 366 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 367 const CeedScalar qf_value = 368 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 369 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 370 if (fabs(qf_value) > qf_value_bound) 371 for (CeedInt n=0; n<num_nodes; n++) 372 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 373 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 374 } 375 } else { 376 // Diagonal Only 377 const CeedScalar qf_value = 378 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 379 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 380 if (fabs(qf_value) > qf_value_bound) 381 for (CeedInt n=0; n<num_nodes; n++) 382 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 383 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 384 } 385 } 386 } 387 } 388 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 389 ierr = CeedVectorRestoreArray(assembled_qf, &assembled_qf_array); CeedChk(ierr); 390 391 // Assemble local operator diagonal 392 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 393 assembled, request); CeedChk(ierr); 394 395 // Cleanup 396 if (is_pointblock) { 397 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 398 } 399 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 400 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 401 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 402 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 403 ierr = CeedFree(&identity); CeedChk(ierr); 404 405 return CEED_ERROR_SUCCESS; 406 } 407 408 /** 409 @brief Core logic for assembling composite operator diagonal 410 411 @param[in] op CeedOperator to assemble point block diagonal 412 @param[in] request Address of CeedRequest for non-blocking completion, else 413 CEED_REQUEST_IMMEDIATE 414 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 415 @param[out] assembled CeedVector to store assembled diagonal 416 417 @return An error code: 0 - success, otherwise - failure 418 419 @ref Developer 420 **/ 421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 422 CeedOperator op, CeedRequest *request, const bool is_pointblock, 423 CeedVector assembled) { 424 int ierr; 425 CeedInt num_sub; 426 CeedOperator *suboperators; 427 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 428 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 429 for (CeedInt i = 0; i < num_sub; i++) { 430 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 431 is_pointblock, assembled); CeedChk(ierr); 432 } 433 return CEED_ERROR_SUCCESS; 434 } 435 436 /** 437 @brief Build nonzero pattern for non-composite operator 438 439 Users should generally use CeedOperatorLinearAssembleSymbolic() 440 441 @param[in] op CeedOperator to assemble nonzero pattern 442 @param[in] offset Offset for number of entries 443 @param[out] rows Row number for each entry 444 @param[out] cols Column number for each entry 445 446 @return An error code: 0 - success, otherwise - failure 447 448 @ref Developer 449 **/ 450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 451 CeedInt *rows, CeedInt *cols) { 452 int ierr; 453 Ceed ceed = op->ceed; 454 if (op->is_composite) 455 // LCOV_EXCL_START 456 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 457 "Composite operator not supported"); 458 // LCOV_EXCL_STOP 459 460 CeedElemRestriction rstr_in; 461 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 462 CeedInt num_elem, elem_size, num_nodes, num_comp; 463 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 464 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 465 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 466 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 467 CeedInt layout_er[3]; 468 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 469 470 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 471 472 // Determine elem_dof relation 473 CeedVector index_vec; 474 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 475 CeedScalar *array; 476 ierr = CeedVectorGetArray(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 477 for (CeedInt i = 0; i < num_nodes; ++i) { 478 array[i] = i; 479 } 480 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 481 CeedVector elem_dof; 482 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 483 CeedChk(ierr); 484 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 485 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 486 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 487 const CeedScalar *elem_dof_a; 488 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 489 CeedChk(ierr); 490 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 491 492 // Determine i, j locations for element matrices 493 CeedInt count = 0; 494 for (int e = 0; e < num_elem; ++e) { 495 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 496 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 497 for (int i = 0; i < elem_size; ++i) { 498 for (int j = 0; j < elem_size; ++j) { 499 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 500 (comp_out)*layout_er[1] + e*layout_er[2]; 501 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 502 (comp_in)*layout_er[1] + e*layout_er[2]; 503 504 const CeedInt row = elem_dof_a[elem_dof_index_row]; 505 const CeedInt col = elem_dof_a[elem_dof_index_col]; 506 507 rows[offset + count] = row; 508 cols[offset + count] = col; 509 count++; 510 } 511 } 512 } 513 } 514 } 515 if (count != local_num_entries) 516 // LCOV_EXCL_START 517 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 518 // LCOV_EXCL_STOP 519 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 520 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 521 522 return CEED_ERROR_SUCCESS; 523 } 524 525 /** 526 @brief Assemble nonzero entries for non-composite operator 527 528 Users should generally use CeedOperatorLinearAssemble() 529 530 @param[in] op CeedOperator to assemble 531 @param[out] offset Offest for number of entries 532 @param[out] values Values to assemble into matrix 533 534 @return An error code: 0 - success, otherwise - failure 535 536 @ref Developer 537 **/ 538 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 539 CeedVector values) { 540 int ierr; 541 Ceed ceed = op->ceed; 542 if (op->is_composite) 543 // LCOV_EXCL_START 544 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 545 "Composite operator not supported"); 546 // LCOV_EXCL_STOP 547 548 // Assemble QFunction 549 CeedQFunction qf; 550 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 551 CeedVector assembled_qf; 552 CeedElemRestriction rstr_q; 553 ierr = CeedOperatorLinearAssembleQFunction( 554 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 555 556 CeedInt qf_length; 557 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 558 559 CeedInt num_input_fields, num_output_fields; 560 CeedOperatorField *input_fields; 561 CeedOperatorField *output_fields; 562 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 563 &num_output_fields, &output_fields); CeedChk(ierr); 564 565 // Determine active input basis 566 CeedQFunctionField *qf_fields; 567 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 568 CeedInt num_eval_mode_in = 0, dim = 1; 569 CeedEvalMode *eval_mode_in = NULL; 570 CeedBasis basis_in = NULL; 571 CeedElemRestriction rstr_in = NULL; 572 for (CeedInt i=0; i<num_input_fields; i++) { 573 CeedVector vec; 574 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 575 if (vec == CEED_VECTOR_ACTIVE) { 576 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 577 CeedChk(ierr); 578 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 579 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 580 CeedChk(ierr); 581 CeedEvalMode eval_mode; 582 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 583 CeedChk(ierr); 584 switch (eval_mode) { 585 case CEED_EVAL_NONE: 586 case CEED_EVAL_INTERP: 587 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 588 eval_mode_in[num_eval_mode_in] = eval_mode; 589 num_eval_mode_in += 1; 590 break; 591 case CEED_EVAL_GRAD: 592 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 593 for (CeedInt d=0; d<dim; d++) { 594 eval_mode_in[num_eval_mode_in+d] = eval_mode; 595 } 596 num_eval_mode_in += dim; 597 break; 598 case CEED_EVAL_WEIGHT: 599 case CEED_EVAL_DIV: 600 case CEED_EVAL_CURL: 601 break; // Caught by QF Assembly 602 } 603 } 604 } 605 606 // Determine active output basis 607 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 608 CeedInt num_eval_mode_out = 0; 609 CeedEvalMode *eval_mode_out = NULL; 610 CeedBasis basis_out = NULL; 611 CeedElemRestriction rstr_out = NULL; 612 for (CeedInt i=0; i<num_output_fields; i++) { 613 CeedVector vec; 614 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 615 if (vec == CEED_VECTOR_ACTIVE) { 616 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 617 CeedChk(ierr); 618 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 619 CeedChk(ierr); 620 CeedEvalMode eval_mode; 621 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 622 CeedChk(ierr); 623 switch (eval_mode) { 624 case CEED_EVAL_NONE: 625 case CEED_EVAL_INTERP: 626 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 627 eval_mode_out[num_eval_mode_out] = eval_mode; 628 num_eval_mode_out += 1; 629 break; 630 case CEED_EVAL_GRAD: 631 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 632 for (CeedInt d=0; d<dim; d++) { 633 eval_mode_out[num_eval_mode_out+d] = eval_mode; 634 } 635 num_eval_mode_out += dim; 636 break; 637 case CEED_EVAL_WEIGHT: 638 case CEED_EVAL_DIV: 639 case CEED_EVAL_CURL: 640 break; // Caught by QF Assembly 641 } 642 } 643 } 644 645 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 646 // LCOV_EXCL_START 647 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 648 "Cannot assemble operator with out inputs/outputs"); 649 // LCOV_EXCL_STOP 650 651 CeedInt num_elem, elem_size, num_qpts, num_comp; 652 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 653 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 654 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 655 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 656 657 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 658 659 // loop over elements and put in data structure 660 const CeedScalar *interp_in, *grad_in; 661 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 662 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 663 664 const CeedScalar *assembled_qf_array; 665 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 666 CeedChk(ierr); 667 668 CeedInt layout_qf[3]; 669 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 670 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 671 672 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 673 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 674 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 675 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 676 num_qpts]; // logically 3-tensor 677 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 678 CeedScalar elem_mat[elem_size * elem_size]; 679 int count = 0; 680 CeedScalar *vals; 681 ierr = CeedVectorGetArray(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 682 for (int e = 0; e < num_elem; ++e) { 683 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 684 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 685 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 686 B_mat_in[ell] = 0.0; 687 } 688 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 689 B_mat_out[ell] = 0.0; 690 } 691 // Store block-diagonal D matrix as collection of small dense blocks 692 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 693 D_mat[ell] = 0.0; 694 } 695 // form element matrix itself (for each block component) 696 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 697 elem_mat[ell] = 0.0; 698 } 699 for (int q = 0; q < num_qpts; ++q) { 700 for (int n = 0; n < elem_size; ++n) { 701 CeedInt d_in = -1; 702 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 703 const int qq = num_eval_mode_in*q; 704 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 705 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 706 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 707 d_in += 1; 708 B_mat_in[(qq+e_in)*elem_size + n] += 709 grad_in[(d_in*num_qpts+q) * elem_size + n]; 710 } else { 711 // LCOV_EXCL_START 712 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 713 // LCOV_EXCL_STOP 714 } 715 } 716 CeedInt d_out = -1; 717 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 718 const int qq = num_eval_mode_out*q; 719 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 720 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 721 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 722 d_out += 1; 723 B_mat_out[(qq+e_out)*elem_size + n] += 724 grad_in[(d_out*num_qpts+q) * elem_size + n]; 725 } else { 726 // LCOV_EXCL_START 727 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 728 // LCOV_EXCL_STOP 729 } 730 } 731 } 732 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 733 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 734 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 735 +comp_out; 736 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 737 e*layout_qf[2]; 738 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 739 } 740 } 741 } 742 // Compute B^T*D 743 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 744 BTD[ell] = 0.0; 745 } 746 for (int j = 0; j<elem_size; ++j) { 747 for (int q = 0; q<num_qpts; ++q) { 748 int qq = num_eval_mode_out*q; 749 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 750 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 751 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 752 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 753 } 754 } 755 } 756 } 757 758 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 759 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 760 761 // put element matrix in coordinate data structure 762 for (int i = 0; i < elem_size; ++i) { 763 for (int j = 0; j < elem_size; ++j) { 764 vals[offset + count] = elem_mat[i*elem_size + j]; 765 count++; 766 } 767 } 768 } 769 } 770 } 771 if (count != local_num_entries) 772 // LCOV_EXCL_START 773 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 774 // LCOV_EXCL_STOP 775 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 776 777 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 778 CeedChk(ierr); 779 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 780 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 781 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 782 783 return CEED_ERROR_SUCCESS; 784 } 785 786 /** 787 @brief Count number of entries for assembled CeedOperator 788 789 @param[in] op CeedOperator to assemble 790 @param[out] num_entries Number of entries in assembled representation 791 792 @return An error code: 0 - success, otherwise - failure 793 794 @ref Utility 795 **/ 796 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 797 CeedInt *num_entries) { 798 int ierr; 799 CeedElemRestriction rstr; 800 CeedInt num_elem, elem_size, num_comp; 801 802 if (op->is_composite) 803 // LCOV_EXCL_START 804 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 805 "Composite operator not supported"); 806 // LCOV_EXCL_STOP 807 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 808 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 809 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 810 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 811 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 812 813 return CEED_ERROR_SUCCESS; 814 } 815 816 /** 817 @brief Common code for creating a multigrid coarse operator and level 818 transfer operators for a CeedOperator 819 820 @param[in] op_fine Fine grid operator 821 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 822 @param[in] rstr_coarse Coarse grid restriction 823 @param[in] basis_coarse Coarse grid active vector basis 824 @param[in] basis_c_to_f Basis for coarse to fine interpolation 825 @param[out] op_coarse Coarse grid operator 826 @param[out] op_prolong Coarse to fine operator 827 @param[out] op_restrict Fine to coarse operator 828 829 @return An error code: 0 - success, otherwise - failure 830 831 @ref Developer 832 **/ 833 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 834 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 835 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 836 CeedOperator *op_restrict) { 837 int ierr; 838 Ceed ceed; 839 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 840 841 // Check for composite operator 842 bool is_composite; 843 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 844 if (is_composite) 845 // LCOV_EXCL_START 846 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 847 "Automatic multigrid setup for composite operators not supported"); 848 // LCOV_EXCL_STOP 849 850 // Coarse Grid 851 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 852 op_coarse); CeedChk(ierr); 853 CeedElemRestriction rstr_fine = NULL; 854 // -- Clone input fields 855 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 856 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 857 rstr_fine = op_fine->input_fields[i]->elem_restr; 858 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 859 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 860 CeedChk(ierr); 861 } else { 862 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 863 op_fine->input_fields[i]->elem_restr, 864 op_fine->input_fields[i]->basis, 865 op_fine->input_fields[i]->vec); CeedChk(ierr); 866 } 867 } 868 // -- Clone output fields 869 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 870 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 871 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 872 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 873 CeedChk(ierr); 874 } else { 875 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 876 op_fine->output_fields[i]->elem_restr, 877 op_fine->output_fields[i]->basis, 878 op_fine->output_fields[i]->vec); CeedChk(ierr); 879 } 880 } 881 882 // Multiplicity vector 883 CeedVector mult_vec, mult_e_vec; 884 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 885 CeedChk(ierr); 886 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 887 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 888 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 889 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 890 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 891 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 892 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 893 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 894 895 // Restriction 896 CeedInt num_comp; 897 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 898 CeedQFunction qf_restrict; 899 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 900 CeedChk(ierr); 901 CeedInt *num_comp_r_data; 902 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 903 num_comp_r_data[0] = num_comp; 904 CeedQFunctionContext ctx_r; 905 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 906 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 907 sizeof(*num_comp_r_data), num_comp_r_data); 908 CeedChk(ierr); 909 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 910 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 911 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 912 CeedChk(ierr); 913 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 914 CeedChk(ierr); 915 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 916 CEED_EVAL_INTERP); CeedChk(ierr); 917 918 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 919 CEED_QFUNCTION_NONE, op_restrict); 920 CeedChk(ierr); 921 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 922 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 923 CeedChk(ierr); 924 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 925 CEED_BASIS_COLLOCATED, mult_vec); 926 CeedChk(ierr); 927 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 928 CEED_VECTOR_ACTIVE); CeedChk(ierr); 929 930 // Prolongation 931 CeedQFunction qf_prolong; 932 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 933 CeedChk(ierr); 934 CeedInt *num_comp_p_data; 935 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 936 num_comp_p_data[0] = num_comp; 937 CeedQFunctionContext ctx_p; 938 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 939 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 940 sizeof(*num_comp_p_data), num_comp_p_data); 941 CeedChk(ierr); 942 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 943 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 944 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 945 CeedChk(ierr); 946 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 947 CeedChk(ierr); 948 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 949 CeedChk(ierr); 950 951 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 952 CEED_QFUNCTION_NONE, op_prolong); 953 CeedChk(ierr); 954 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 955 CEED_VECTOR_ACTIVE); CeedChk(ierr); 956 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 957 CEED_BASIS_COLLOCATED, mult_vec); 958 CeedChk(ierr); 959 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 960 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 961 CeedChk(ierr); 962 963 // Cleanup 964 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 965 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 966 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 967 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 968 return CEED_ERROR_SUCCESS; 969 } 970 971 /** 972 @brief Build 1D mass matrix and Laplacian with perturbation 973 974 @param[in] interp_1d Interpolation matrix in one dimension 975 @param[in] grad_1d Gradient matrix in one dimension 976 @param[in] q_weight_1d Quadrature weights in one dimension 977 @param[in] P_1d Number of basis nodes in one dimension 978 @param[in] Q_1d Number of quadrature points in one dimension 979 @param[in] dim Dimension of basis 980 @param[out] mass Assembled mass matrix in one dimension 981 @param[out] laplace Assembled perturbed Laplacian in one dimension 982 983 @return An error code: 0 - success, otherwise - failure 984 985 @ref Developer 986 **/ 987 CeedPragmaOptimizeOff 988 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 989 const CeedScalar *grad_1d, 990 const CeedScalar *q_weight_1d, CeedInt P_1d, 991 CeedInt Q_1d, CeedInt dim, 992 CeedScalar *mass, CeedScalar *laplace) { 993 994 for (CeedInt i=0; i<P_1d; i++) 995 for (CeedInt j=0; j<P_1d; j++) { 996 CeedScalar sum = 0.0; 997 for (CeedInt k=0; k<Q_1d; k++) 998 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 999 mass[i+j*P_1d] = sum; 1000 } 1001 // -- Laplacian 1002 for (CeedInt i=0; i<P_1d; i++) 1003 for (CeedInt j=0; j<P_1d; j++) { 1004 CeedScalar sum = 0.0; 1005 for (CeedInt k=0; k<Q_1d; k++) 1006 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1007 laplace[i+j*P_1d] = sum; 1008 } 1009 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1010 for (CeedInt i=0; i<P_1d; i++) 1011 laplace[i+P_1d*i] += perturbation; 1012 return CEED_ERROR_SUCCESS; 1013 } 1014 CeedPragmaOptimizeOn 1015 1016 /// @} 1017 1018 /// ---------------------------------------------------------------------------- 1019 /// CeedOperator Public API 1020 /// ---------------------------------------------------------------------------- 1021 /// @addtogroup CeedOperatorUser 1022 /// @{ 1023 1024 /** 1025 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1026 1027 This returns a CeedVector containing a matrix at each quadrature point 1028 providing the action of the CeedQFunction associated with the CeedOperator. 1029 The vector 'assembled' is of shape 1030 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1031 and contains column-major matrices representing the action of the 1032 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1033 outputs are in the order provided by the user when adding CeedOperator fields. 1034 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1035 'v', provided in that order, would result in an assembled QFunction that 1036 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1037 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1038 1039 Note: Calling this function asserts that setup is complete 1040 and sets the CeedOperator as immutable. 1041 1042 @param op CeedOperator to assemble CeedQFunction 1043 @param[out] assembled CeedVector to store assembled CeedQFunction at 1044 quadrature points 1045 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1046 CeedQFunction 1047 @param request Address of CeedRequest for non-blocking completion, else 1048 @ref CEED_REQUEST_IMMEDIATE 1049 1050 @return An error code: 0 - success, otherwise - failure 1051 1052 @ref User 1053 **/ 1054 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1055 CeedElemRestriction *rstr, 1056 CeedRequest *request) { 1057 int ierr; 1058 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1059 1060 // Backend version 1061 if (op->LinearAssembleQFunction) { 1062 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); CeedChk(ierr); 1063 return CEED_ERROR_SUCCESS; 1064 } else { 1065 // Fallback to reference Ceed 1066 if (!op->op_fallback) { 1067 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1068 } 1069 // Assemble 1070 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1071 rstr, request); CeedChk(ierr); 1072 return CEED_ERROR_SUCCESS; 1073 } 1074 } 1075 1076 /** 1077 @brief Assemble the diagonal of a square linear CeedOperator 1078 1079 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1080 1081 Note: Currently only non-composite CeedOperators with a single field and 1082 composite CeedOperators with single field sub-operators are supported. 1083 1084 Note: Calling this function asserts that setup is complete 1085 and sets the CeedOperator as immutable. 1086 1087 @param op CeedOperator to assemble CeedQFunction 1088 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1089 @param request Address of CeedRequest for non-blocking completion, else 1090 @ref CEED_REQUEST_IMMEDIATE 1091 1092 @return An error code: 0 - success, otherwise - failure 1093 1094 @ref User 1095 **/ 1096 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1097 CeedRequest *request) { 1098 int ierr; 1099 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1100 1101 // Use backend version, if available 1102 if (op->LinearAssembleDiagonal) { 1103 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1104 return CEED_ERROR_SUCCESS; 1105 } else if (op->LinearAssembleAddDiagonal) { 1106 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1107 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1108 return CEED_ERROR_SUCCESS; 1109 } else { 1110 // Check for valid fallback resource 1111 const char *resource, *fallback_resource; 1112 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1113 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1114 CeedChk(ierr); 1115 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1116 // Fallback to reference Ceed 1117 if (!op->op_fallback) { 1118 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1119 } 1120 // Assemble 1121 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1122 CeedChk(ierr); 1123 return CEED_ERROR_SUCCESS; 1124 } 1125 } 1126 1127 // Default interface implementation 1128 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1129 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1130 CeedChk(ierr); 1131 return CEED_ERROR_SUCCESS; 1132 } 1133 1134 /** 1135 @brief Assemble the diagonal of a square linear CeedOperator 1136 1137 This sums into a CeedVector the diagonal of a linear CeedOperator. 1138 1139 Note: Currently only non-composite CeedOperators with a single field and 1140 composite CeedOperators with single field sub-operators are supported. 1141 1142 Note: Calling this function asserts that setup is complete 1143 and sets the CeedOperator as immutable. 1144 1145 @param op CeedOperator to assemble CeedQFunction 1146 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1147 @param request Address of CeedRequest for non-blocking completion, else 1148 @ref CEED_REQUEST_IMMEDIATE 1149 1150 @return An error code: 0 - success, otherwise - failure 1151 1152 @ref User 1153 **/ 1154 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1155 CeedRequest *request) { 1156 int ierr; 1157 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1158 1159 // Use backend version, if available 1160 if (op->LinearAssembleAddDiagonal) { 1161 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1162 return CEED_ERROR_SUCCESS; 1163 } else { 1164 // Check for valid fallback resource 1165 const char *resource, *fallback_resource; 1166 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1167 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1168 CeedChk(ierr); 1169 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1170 // Fallback to reference Ceed 1171 if (!op->op_fallback) { 1172 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1173 } 1174 // Assemble 1175 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1176 request); CeedChk(ierr); 1177 return CEED_ERROR_SUCCESS; 1178 } 1179 } 1180 1181 // Default interface implementation 1182 bool is_composite; 1183 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1184 if (is_composite) { 1185 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1186 false, assembled); CeedChk(ierr); 1187 return CEED_ERROR_SUCCESS; 1188 } else { 1189 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1190 CeedChk(ierr); 1191 return CEED_ERROR_SUCCESS; 1192 } 1193 } 1194 1195 /** 1196 @brief Assemble the point block diagonal of a square linear CeedOperator 1197 1198 This overwrites a CeedVector with the point block diagonal of a linear 1199 CeedOperator. 1200 1201 Note: Currently only non-composite CeedOperators with a single field and 1202 composite CeedOperators with single field sub-operators are supported. 1203 1204 Note: Calling this function asserts that setup is complete 1205 and sets the CeedOperator as immutable. 1206 1207 @param op CeedOperator to assemble CeedQFunction 1208 @param[out] assembled CeedVector to store assembled CeedOperator point block 1209 diagonal, provided in row-major form with an 1210 @a num_comp * @a num_comp block at each node. The dimensions 1211 of this vector are derived from the active vector 1212 for the CeedOperator. The array has shape 1213 [nodes, component out, component in]. 1214 @param request Address of CeedRequest for non-blocking completion, else 1215 CEED_REQUEST_IMMEDIATE 1216 1217 @return An error code: 0 - success, otherwise - failure 1218 1219 @ref User 1220 **/ 1221 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1222 CeedVector assembled, CeedRequest *request) { 1223 int ierr; 1224 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1225 1226 // Use backend version, if available 1227 if (op->LinearAssemblePointBlockDiagonal) { 1228 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1229 CeedChk(ierr); 1230 return CEED_ERROR_SUCCESS; 1231 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1232 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1233 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1234 request); CeedChk(ierr); 1235 return CEED_ERROR_SUCCESS; 1236 } else { 1237 // Check for valid fallback resource 1238 const char *resource, *fallback_resource; 1239 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1240 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1241 CeedChk(ierr); 1242 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1243 // Fallback to reference Ceed 1244 if (!op->op_fallback) { 1245 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1246 } 1247 // Assemble 1248 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1249 assembled, request); CeedChk(ierr); 1250 return CEED_ERROR_SUCCESS; 1251 } 1252 } 1253 1254 // Default interface implementation 1255 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1256 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1257 CeedChk(ierr); 1258 return CEED_ERROR_SUCCESS; 1259 } 1260 1261 /** 1262 @brief Assemble the point block diagonal of a square linear CeedOperator 1263 1264 This sums into a CeedVector with the point block diagonal of a linear 1265 CeedOperator. 1266 1267 Note: Currently only non-composite CeedOperators with a single field and 1268 composite CeedOperators with single field sub-operators are supported. 1269 1270 Note: Calling this function asserts that setup is complete 1271 and sets the CeedOperator as immutable. 1272 1273 @param op CeedOperator to assemble CeedQFunction 1274 @param[out] assembled CeedVector to store assembled CeedOperator point block 1275 diagonal, provided in row-major form with an 1276 @a num_comp * @a num_comp block at each node. The dimensions 1277 of this vector are derived from the active vector 1278 for the CeedOperator. The array has shape 1279 [nodes, component out, component in]. 1280 @param request Address of CeedRequest for non-blocking completion, else 1281 CEED_REQUEST_IMMEDIATE 1282 1283 @return An error code: 0 - success, otherwise - failure 1284 1285 @ref User 1286 **/ 1287 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1288 CeedVector assembled, CeedRequest *request) { 1289 int ierr; 1290 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1291 1292 // Use backend version, if available 1293 if (op->LinearAssembleAddPointBlockDiagonal) { 1294 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1295 CeedChk(ierr); 1296 return CEED_ERROR_SUCCESS; 1297 } else { 1298 // Check for valid fallback resource 1299 const char *resource, *fallback_resource; 1300 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1301 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1302 CeedChk(ierr); 1303 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1304 // Fallback to reference Ceed 1305 if (!op->op_fallback) { 1306 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1307 } 1308 // Assemble 1309 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1310 assembled, request); CeedChk(ierr); 1311 return CEED_ERROR_SUCCESS; 1312 } 1313 } 1314 1315 // Default interface implemenation 1316 bool is_composite; 1317 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1318 if (is_composite) { 1319 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1320 true, assembled); CeedChk(ierr); 1321 return CEED_ERROR_SUCCESS; 1322 } else { 1323 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1324 CeedChk(ierr); 1325 return CEED_ERROR_SUCCESS; 1326 } 1327 } 1328 1329 /** 1330 @brief Fully assemble the nonzero pattern of a linear operator. 1331 1332 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1333 1334 The assembly routines use coordinate format, with num_entries tuples of the 1335 form (i, j, value) which indicate that value should be added to the matrix 1336 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1337 This function returns the number of entries and their (i, j) locations, 1338 while CeedOperatorLinearAssemble() provides the values in the same 1339 ordering. 1340 1341 This will generally be slow unless your operator is low-order. 1342 1343 Note: Calling this function asserts that setup is complete 1344 and sets the CeedOperator as immutable. 1345 1346 @param[in] op CeedOperator to assemble 1347 @param[out] num_entries Number of entries in coordinate nonzero pattern 1348 @param[out] rows Row number for each entry 1349 @param[out] cols Column number for each entry 1350 1351 @ref User 1352 **/ 1353 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries, 1354 CeedInt **rows, CeedInt **cols) { 1355 int ierr; 1356 CeedInt num_suboperators, single_entries; 1357 CeedOperator *sub_operators; 1358 bool is_composite; 1359 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1360 1361 // Use backend version, if available 1362 if (op->LinearAssembleSymbolic) { 1363 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1364 return CEED_ERROR_SUCCESS; 1365 } else { 1366 // Check for valid fallback resource 1367 const char *resource, *fallback_resource; 1368 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1369 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1370 CeedChk(ierr); 1371 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1372 // Fallback to reference Ceed 1373 if (!op->op_fallback) { 1374 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1375 } 1376 // Assemble 1377 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1378 cols); CeedChk(ierr); 1379 return CEED_ERROR_SUCCESS; 1380 } 1381 } 1382 1383 // Default interface implementation 1384 1385 // count entries and allocate rows, cols arrays 1386 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1387 *num_entries = 0; 1388 if (is_composite) { 1389 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1390 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1391 for (int k = 0; k < num_suboperators; ++k) { 1392 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1393 &single_entries); CeedChk(ierr); 1394 *num_entries += single_entries; 1395 } 1396 } else { 1397 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1398 &single_entries); CeedChk(ierr); 1399 *num_entries += single_entries; 1400 } 1401 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1402 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1403 1404 // assemble nonzero locations 1405 CeedInt offset = 0; 1406 if (is_composite) { 1407 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1408 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1409 for (int k = 0; k < num_suboperators; ++k) { 1410 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1411 *cols); CeedChk(ierr); 1412 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1413 &single_entries); 1414 CeedChk(ierr); 1415 offset += single_entries; 1416 } 1417 } else { 1418 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1419 CeedChk(ierr); 1420 } 1421 1422 return CEED_ERROR_SUCCESS; 1423 } 1424 1425 /** 1426 @brief Fully assemble the nonzero entries of a linear operator. 1427 1428 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1429 1430 The assembly routines use coordinate format, with num_entries tuples of the 1431 form (i, j, value) which indicate that value should be added to the matrix 1432 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1433 This function returns the values of the nonzero entries to be added, their 1434 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1435 1436 This will generally be slow unless your operator is low-order. 1437 1438 Note: Calling this function asserts that setup is complete 1439 and sets the CeedOperator as immutable. 1440 1441 @param[in] op CeedOperator to assemble 1442 @param[out] values Values to assemble into matrix 1443 1444 @ref User 1445 **/ 1446 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1447 int ierr; 1448 CeedInt num_suboperators, single_entries = 0; 1449 CeedOperator *sub_operators; 1450 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1451 1452 // Use backend version, if available 1453 if (op->LinearAssemble) { 1454 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1455 return CEED_ERROR_SUCCESS; 1456 } else { 1457 // Check for valid fallback resource 1458 const char *resource, *fallback_resource; 1459 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1460 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1461 CeedChk(ierr); 1462 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1463 // Fallback to reference Ceed 1464 if (!op->op_fallback) { 1465 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1466 } 1467 // Assemble 1468 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1469 return CEED_ERROR_SUCCESS; 1470 } 1471 } 1472 1473 // Default interface implementation 1474 bool is_composite; 1475 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1476 1477 CeedInt offset = 0; 1478 if (is_composite) { 1479 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1480 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1481 for (int k = 0; k < num_suboperators; ++k) { 1482 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1483 CeedChk(ierr); 1484 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1485 &single_entries); 1486 CeedChk(ierr); 1487 offset += single_entries; 1488 } 1489 } else { 1490 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1491 } 1492 1493 return CEED_ERROR_SUCCESS; 1494 } 1495 1496 /** 1497 @brief Create a multigrid coarse operator and level transfer operators 1498 for a CeedOperator, creating the prolongation basis from the 1499 fine and coarse grid interpolation 1500 1501 Note: Calling this function asserts that setup is complete 1502 and sets the CeedOperator as immutable. 1503 1504 @param[in] op_fine Fine grid operator 1505 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1506 @param[in] rstr_coarse Coarse grid restriction 1507 @param[in] basis_coarse Coarse grid active vector basis 1508 @param[out] op_coarse Coarse grid operator 1509 @param[out] op_prolong Coarse to fine operator 1510 @param[out] op_restrict Fine to coarse operator 1511 1512 @return An error code: 0 - success, otherwise - failure 1513 1514 @ref User 1515 **/ 1516 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1517 CeedVector p_mult_fine, 1518 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1519 CeedOperator *op_coarse, CeedOperator *op_prolong, 1520 CeedOperator *op_restrict) { 1521 int ierr; 1522 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1523 Ceed ceed; 1524 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1525 1526 // Check for compatible quadrature spaces 1527 CeedBasis basis_fine; 1528 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1529 CeedInt Q_f, Q_c; 1530 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1531 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1532 if (Q_f != Q_c) 1533 // LCOV_EXCL_START 1534 return CeedError(ceed, CEED_ERROR_DIMENSION, 1535 "Bases must have compatible quadrature spaces"); 1536 // LCOV_EXCL_STOP 1537 1538 // Coarse to fine basis 1539 CeedInt P_f, P_c, Q = Q_f; 1540 bool is_tensor_f, is_tensor_c; 1541 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1542 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1543 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1544 if (is_tensor_f && is_tensor_c) { 1545 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1546 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1547 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1548 } else if (!is_tensor_f && !is_tensor_c) { 1549 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1550 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1551 } else { 1552 // LCOV_EXCL_START 1553 return CeedError(ceed, CEED_ERROR_MINOR, 1554 "Bases must both be tensor or non-tensor"); 1555 // LCOV_EXCL_STOP 1556 } 1557 1558 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1559 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1560 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1561 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1562 if (is_tensor_f) { 1563 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1564 memcpy(interp_c, basis_coarse->interp_1d, 1565 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1566 } else { 1567 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1568 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1569 } 1570 1571 // -- QR Factorization, interp_f = Q R 1572 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1573 1574 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1575 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1576 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1577 1578 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1579 for (CeedInt j=0; j<P_c; j++) { // Column j 1580 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1581 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1582 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1583 for (CeedInt k=i+1; k<P_f; k++) 1584 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1585 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1586 } 1587 } 1588 ierr = CeedFree(&tau); CeedChk(ierr); 1589 ierr = CeedFree(&interp_c); CeedChk(ierr); 1590 ierr = CeedFree(&interp_f); CeedChk(ierr); 1591 1592 // Complete with interp_c_to_f versions of code 1593 if (is_tensor_f) { 1594 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1595 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1596 CeedChk(ierr); 1597 } else { 1598 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1599 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1600 CeedChk(ierr); 1601 } 1602 1603 // Cleanup 1604 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1605 return CEED_ERROR_SUCCESS; 1606 } 1607 1608 /** 1609 @brief Create a multigrid coarse operator and level transfer operators 1610 for a CeedOperator with a tensor basis for the active basis 1611 1612 Note: Calling this function asserts that setup is complete 1613 and sets the CeedOperator as immutable. 1614 1615 @param[in] op_fine Fine grid operator 1616 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1617 @param[in] rstr_coarse Coarse grid restriction 1618 @param[in] basis_coarse Coarse grid active vector basis 1619 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1620 @param[out] op_coarse Coarse grid operator 1621 @param[out] op_prolong Coarse to fine operator 1622 @param[out] op_restrict Fine to coarse operator 1623 1624 @return An error code: 0 - success, otherwise - failure 1625 1626 @ref User 1627 **/ 1628 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1629 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1630 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1631 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1632 int ierr; 1633 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1634 Ceed ceed; 1635 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1636 1637 // Check for compatible quadrature spaces 1638 CeedBasis basis_fine; 1639 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1640 CeedInt Q_f, Q_c; 1641 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1642 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1643 if (Q_f != Q_c) 1644 // LCOV_EXCL_START 1645 return CeedError(ceed, CEED_ERROR_DIMENSION, 1646 "Bases must have compatible quadrature spaces"); 1647 // LCOV_EXCL_STOP 1648 1649 // Coarse to fine basis 1650 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1651 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1652 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1653 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1654 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1655 CeedChk(ierr); 1656 P_1d_c = dim == 1 ? num_nodes_c : 1657 dim == 2 ? sqrt(num_nodes_c) : 1658 cbrt(num_nodes_c); 1659 CeedScalar *q_ref, *q_weight, *grad; 1660 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1661 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1662 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1663 CeedBasis basis_c_to_f; 1664 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1665 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1666 CeedChk(ierr); 1667 ierr = CeedFree(&q_ref); CeedChk(ierr); 1668 ierr = CeedFree(&q_weight); CeedChk(ierr); 1669 ierr = CeedFree(&grad); CeedChk(ierr); 1670 1671 // Core code 1672 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1673 basis_coarse, basis_c_to_f, op_coarse, 1674 op_prolong, op_restrict); 1675 CeedChk(ierr); 1676 return CEED_ERROR_SUCCESS; 1677 } 1678 1679 /** 1680 @brief Create a multigrid coarse operator and level transfer operators 1681 for a CeedOperator with a non-tensor basis for the active vector 1682 1683 Note: Calling this function asserts that setup is complete 1684 and sets the CeedOperator as immutable. 1685 1686 @param[in] op_fine Fine grid operator 1687 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1688 @param[in] rstr_coarse Coarse grid restriction 1689 @param[in] basis_coarse Coarse grid active vector basis 1690 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1691 @param[out] op_coarse Coarse grid operator 1692 @param[out] op_prolong Coarse to fine operator 1693 @param[out] op_restrict Fine to coarse operator 1694 1695 @return An error code: 0 - success, otherwise - failure 1696 1697 @ref User 1698 **/ 1699 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1700 CeedVector p_mult_fine, 1701 CeedElemRestriction rstr_coarse, 1702 CeedBasis basis_coarse, 1703 const CeedScalar *interp_c_to_f, 1704 CeedOperator *op_coarse, 1705 CeedOperator *op_prolong, 1706 CeedOperator *op_restrict) { 1707 int ierr; 1708 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1709 Ceed ceed; 1710 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1711 1712 // Check for compatible quadrature spaces 1713 CeedBasis basis_fine; 1714 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1715 CeedInt Q_f, Q_c; 1716 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1717 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1718 if (Q_f != Q_c) 1719 // LCOV_EXCL_START 1720 return CeedError(ceed, CEED_ERROR_DIMENSION, 1721 "Bases must have compatible quadrature spaces"); 1722 // LCOV_EXCL_STOP 1723 1724 // Coarse to fine basis 1725 CeedElemTopology topo; 1726 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1727 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1728 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1729 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1730 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1731 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1732 CeedChk(ierr); 1733 CeedScalar *q_ref, *q_weight, *grad; 1734 ierr = CeedCalloc(num_nodes_f, &q_ref); CeedChk(ierr); 1735 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1736 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 1737 CeedBasis basis_c_to_f; 1738 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 1739 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1740 CeedChk(ierr); 1741 ierr = CeedFree(&q_ref); CeedChk(ierr); 1742 ierr = CeedFree(&q_weight); CeedChk(ierr); 1743 ierr = CeedFree(&grad); CeedChk(ierr); 1744 1745 // Core code 1746 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1747 basis_coarse, basis_c_to_f, op_coarse, 1748 op_prolong, op_restrict); 1749 CeedChk(ierr); 1750 return CEED_ERROR_SUCCESS; 1751 } 1752 1753 /** 1754 @brief Build a FDM based approximate inverse for each element for a 1755 CeedOperator 1756 1757 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 1758 Method based approximate inverse. This function obtains the simultaneous 1759 diagonalization for the 1D mass and Laplacian operators, 1760 M = V^T V, K = V^T S V. 1761 The assembled QFunction is used to modify the eigenvalues from simultaneous 1762 diagonalization and obtain an approximate inverse of the form 1763 V^T S^hat V. The CeedOperator must be linear and non-composite. The 1764 associated CeedQFunction must therefore also be linear. 1765 1766 Note: Calling this function asserts that setup is complete 1767 and sets the CeedOperator as immutable. 1768 1769 @param op CeedOperator to create element inverses 1770 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 1771 for each element 1772 @param request Address of CeedRequest for non-blocking completion, else 1773 @ref CEED_REQUEST_IMMEDIATE 1774 1775 @return An error code: 0 - success, otherwise - failure 1776 1777 @ref Backend 1778 **/ 1779 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 1780 CeedRequest *request) { 1781 int ierr; 1782 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1783 1784 // Use backend version, if available 1785 if (op->CreateFDMElementInverse) { 1786 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 1787 return CEED_ERROR_SUCCESS; 1788 } else { 1789 // Check for valid fallback resource 1790 const char *resource, *fallback_resource; 1791 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1792 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1793 CeedChk(ierr); 1794 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1795 // Fallback to reference Ceed 1796 if (!op->op_fallback) { 1797 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1798 } 1799 // Assemble 1800 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 1801 CeedChk(ierr); 1802 return CEED_ERROR_SUCCESS; 1803 } 1804 } 1805 1806 // Interface implementation 1807 Ceed ceed, ceed_parent; 1808 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1809 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 1810 ceed_parent = ceed_parent ? ceed_parent : ceed; 1811 CeedQFunction qf; 1812 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 1813 1814 // Determine active input basis 1815 bool interp = false, grad = false; 1816 CeedBasis basis = NULL; 1817 CeedElemRestriction rstr = NULL; 1818 CeedOperatorField *op_fields; 1819 CeedQFunctionField *qf_fields; 1820 CeedInt num_input_fields; 1821 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 1822 CeedChk(ierr); 1823 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 1824 for (CeedInt i=0; i<num_input_fields; i++) { 1825 CeedVector vec; 1826 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 1827 if (vec == CEED_VECTOR_ACTIVE) { 1828 CeedEvalMode eval_mode; 1829 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 1830 interp = interp || eval_mode == CEED_EVAL_INTERP; 1831 grad = grad || eval_mode == CEED_EVAL_GRAD; 1832 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 1833 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 1834 } 1835 } 1836 if (!basis) 1837 // LCOV_EXCL_START 1838 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 1839 // LCOV_EXCL_STOP 1840 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1, 1841 l_size = 1; 1842 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 1843 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 1844 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 1845 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 1846 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 1847 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 1848 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 1849 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 1850 1851 // Build and diagonalize 1D Mass and Laplacian 1852 bool tensor_basis; 1853 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 1854 if (!tensor_basis) 1855 // LCOV_EXCL_START 1856 return CeedError(ceed, CEED_ERROR_BACKEND, 1857 "FDMElementInverse only supported for tensor " 1858 "bases"); 1859 // LCOV_EXCL_STOP 1860 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 1861 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 1862 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 1863 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 1864 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 1865 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 1866 // -- Build matrices 1867 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 1868 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 1869 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 1870 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 1871 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 1872 mass, laplace); CeedChk(ierr); 1873 1874 // -- Diagonalize 1875 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 1876 CeedChk(ierr); 1877 ierr = CeedFree(&mass); CeedChk(ierr); 1878 ierr = CeedFree(&laplace); CeedChk(ierr); 1879 for (CeedInt i=0; i<P_1d; i++) 1880 for (CeedInt j=0; j<P_1d; j++) 1881 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 1882 ierr = CeedFree(&x); CeedChk(ierr); 1883 1884 // Assemble QFunction 1885 CeedVector assembled; 1886 CeedElemRestriction rstr_qf; 1887 ierr = CeedOperatorLinearAssembleQFunction(op, &assembled, &rstr_qf, request); 1888 CeedChk(ierr); 1889 CeedInt layout[3]; 1890 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 1891 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 1892 CeedScalar max_norm = 0; 1893 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 1894 1895 // Calculate element averages 1896 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 1897 CeedScalar *elem_avg; 1898 const CeedScalar *assembled_array, *q_weight_array; 1899 CeedVector q_weight; 1900 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 1901 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 1902 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 1903 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 1904 CeedChk(ierr); 1905 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 1906 CeedChk(ierr); 1907 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 1908 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 1909 for (CeedInt e=0; e<num_elem; e++) { 1910 CeedInt count = 0; 1911 for (CeedInt q=0; q<num_qpts; q++) 1912 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 1913 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 1914 qf_value_bound) { 1915 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 1916 q_weight_array[q]; 1917 count++; 1918 } 1919 if (count) { 1920 elem_avg[e] /= count; 1921 } else { 1922 elem_avg[e] = 1.0; 1923 } 1924 } 1925 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 1926 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 1927 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 1928 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 1929 1930 // Build FDM diagonal 1931 CeedVector q_data; 1932 CeedScalar *q_data_array, *fdm_diagonal; 1933 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 1934 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 1935 for (CeedInt c=0; c<num_comp; c++) 1936 for (CeedInt n=0; n<elem_size; n++) { 1937 if (interp) 1938 fdm_diagonal[c*elem_size + n] = 1.0; 1939 if (grad) 1940 for (CeedInt d=0; d<dim; d++) { 1941 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 1942 fdm_diagonal[c*elem_size + n] += lambda[i]; 1943 } 1944 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 1945 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 1946 } 1947 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 1948 CeedChk(ierr); 1949 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 1950 ierr = CeedVectorGetArray(q_data, CEED_MEM_HOST, &q_data_array); CeedChk(ierr); 1951 for (CeedInt e=0; e<num_elem; e++) 1952 for (CeedInt c=0; c<num_comp; c++) 1953 for (CeedInt n=0; n<elem_size; n++) 1954 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 1955 fdm_diagonal[c*elem_size + n]); 1956 ierr = CeedFree(&elem_avg); CeedChk(ierr); 1957 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 1958 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 1959 1960 // Setup FDM operator 1961 // -- Basis 1962 CeedBasis fdm_basis; 1963 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 1964 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 1965 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 1966 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 1967 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 1968 fdm_interp, grad_dummy, q_ref_dummy, 1969 q_weight_dummy, &fdm_basis); CeedChk(ierr); 1970 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 1971 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 1972 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 1973 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 1974 ierr = CeedFree(&lambda); CeedChk(ierr); 1975 1976 // -- Restriction 1977 CeedElemRestriction rstr_qd_i; 1978 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 1979 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 1980 num_comp, num_elem*num_comp*elem_size, 1981 strides, &rstr_qd_i); CeedChk(ierr); 1982 // -- QFunction 1983 CeedQFunction qf_fdm; 1984 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 1985 CeedChk(ierr); 1986 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 1987 CeedChk(ierr); 1988 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 1989 CeedChk(ierr); 1990 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 1991 CeedChk(ierr); 1992 // -- QFunction context 1993 CeedInt *num_comp_data; 1994 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 1995 num_comp_data[0] = num_comp; 1996 CeedQFunctionContext ctx_fdm; 1997 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 1998 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 1999 sizeof(*num_comp_data), num_comp_data); 2000 CeedChk(ierr); 2001 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2002 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2003 // -- Operator 2004 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2005 CeedChk(ierr); 2006 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2007 CeedChk(ierr); 2008 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2009 q_data); CeedChk(ierr); 2010 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2011 CeedChk(ierr); 2012 2013 // Cleanup 2014 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2015 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2016 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2017 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2018 2019 return CEED_ERROR_SUCCESS; 2020 } 2021 2022 /// @} 2023