1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->interface_setup = false; 75 op_ref->backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 op->op_fallback = op_ref; 79 80 // Clone QF 81 CeedQFunction qf_ref; 82 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 83 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 84 qf_ref->data = NULL; 85 qf_ref->ceed = ceed_ref; 86 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 87 op_ref->qf = qf_ref; 88 op->qf_fallback = qf_ref; 89 return CEED_ERROR_SUCCESS; 90 } 91 92 /** 93 @brief Select correct basis matrix pointer based on CeedEvalMode 94 95 @param[in] eval_mode Current basis evaluation mode 96 @param[in] identity Pointer to identity matrix 97 @param[in] interp Pointer to interpolation matrix 98 @param[in] grad Pointer to gradient matrix 99 @param[out] basis_ptr Basis pointer to set 100 101 @return none 102 103 @ref Developer 104 **/ 105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 106 const CeedScalar *identity, const CeedScalar *interp, 107 const CeedScalar *grad, const CeedScalar **basis_ptr) { 108 switch (eval_mode) { 109 case CEED_EVAL_NONE: 110 *basis_ptr = identity; 111 break; 112 case CEED_EVAL_INTERP: 113 *basis_ptr = interp; 114 break; 115 case CEED_EVAL_GRAD: 116 *basis_ptr = grad; 117 break; 118 case CEED_EVAL_WEIGHT: 119 case CEED_EVAL_DIV: 120 case CEED_EVAL_CURL: 121 break; // Caught by QF Assembly 122 } 123 } 124 125 /** 126 @brief Create point block restriction for active operator field 127 128 @param[in] rstr Original CeedElemRestriction for active field 129 @param[out] pointblock_rstr Address of the variable where the newly created 130 CeedElemRestriction will be stored 131 132 @return An error code: 0 - success, otherwise - failure 133 134 @ref Developer 135 **/ 136 static int CeedOperatorCreateActivePointBlockRestriction( 137 CeedElemRestriction rstr, 138 CeedElemRestriction *pointblock_rstr) { 139 int ierr; 140 Ceed ceed; 141 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 142 const CeedInt *offsets; 143 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 144 CeedChk(ierr); 145 146 // Expand offsets 147 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 148 *pointblock_offsets; 149 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 152 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 153 CeedInt shift = num_comp; 154 if (comp_stride != 1) 155 shift *= num_comp; 156 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 157 CeedChk(ierr); 158 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 159 pointblock_offsets[i] = offsets[i]*shift; 160 if (pointblock_offsets[i] > max) 161 max = pointblock_offsets[i]; 162 } 163 164 // Create new restriction 165 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 166 1, max + num_comp*num_comp, CEED_MEM_HOST, 167 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 168 CeedChk(ierr); 169 170 // Cleanup 171 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 172 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else 181 CEED_REQUEST_IMMEDIATE 182 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 183 @param[out] assembled CeedVector to store assembled diagonal 184 185 @return An error code: 0 - success, otherwise - failure 186 187 @ref Developer 188 **/ 189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 190 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 191 int ierr; 192 Ceed ceed; 193 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 194 195 // Assemble QFunction 196 CeedQFunction qf; 197 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 198 CeedInt num_input_fields, num_output_fields; 199 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 200 CeedChk(ierr); 201 CeedVector assembled_qf; 202 CeedElemRestriction rstr; 203 ierr = CeedOperatorLinearAssembleQFunction(op, &assembled_qf, &rstr, request); 204 CeedChk(ierr); 205 CeedInt layout[3]; 206 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 207 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 208 CeedScalar max_norm = 0; 209 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 210 211 // Determine active input basis 212 CeedOperatorField *op_fields; 213 CeedQFunctionField *qf_fields; 214 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 215 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 216 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 217 CeedEvalMode *eval_mode_in = NULL; 218 CeedBasis basis_in = NULL; 219 CeedElemRestriction rstr_in = NULL; 220 for (CeedInt i=0; i<num_input_fields; i++) { 221 CeedVector vec; 222 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 223 if (vec == CEED_VECTOR_ACTIVE) { 224 CeedElemRestriction rstr; 225 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 226 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 227 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 228 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 229 if (rstr_in && rstr_in != rstr) 230 // LCOV_EXCL_START 231 return CeedError(ceed, CEED_ERROR_BACKEND, 232 "Multi-field non-composite operator diagonal assembly not supported"); 233 // LCOV_EXCL_STOP 234 rstr_in = rstr; 235 CeedEvalMode eval_mode; 236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 237 switch (eval_mode) { 238 case CEED_EVAL_NONE: 239 case CEED_EVAL_INTERP: 240 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 241 eval_mode_in[num_eval_mode_in] = eval_mode; 242 num_eval_mode_in += 1; 243 break; 244 case CEED_EVAL_GRAD: 245 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 246 for (CeedInt d=0; d<dim; d++) 247 eval_mode_in[num_eval_mode_in+d] = eval_mode; 248 num_eval_mode_in += dim; 249 break; 250 case CEED_EVAL_WEIGHT: 251 case CEED_EVAL_DIV: 252 case CEED_EVAL_CURL: 253 break; // Caught by QF Assembly 254 } 255 } 256 } 257 258 // Determine active output basis 259 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 260 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 261 CeedInt num_eval_mode_out = 0; 262 CeedEvalMode *eval_mode_out = NULL; 263 CeedBasis basis_out = NULL; 264 CeedElemRestriction rstr_out = NULL; 265 for (CeedInt i=0; i<num_output_fields; i++) { 266 CeedVector vec; 267 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 268 if (vec == CEED_VECTOR_ACTIVE) { 269 CeedElemRestriction rstr; 270 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 271 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 272 if (rstr_out && rstr_out != rstr) 273 // LCOV_EXCL_START 274 return CeedError(ceed, CEED_ERROR_BACKEND, 275 "Multi-field non-composite operator diagonal assembly not supported"); 276 // LCOV_EXCL_STOP 277 rstr_out = rstr; 278 CeedEvalMode eval_mode; 279 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 280 switch (eval_mode) { 281 case CEED_EVAL_NONE: 282 case CEED_EVAL_INTERP: 283 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 284 eval_mode_out[num_eval_mode_out] = eval_mode; 285 num_eval_mode_out += 1; 286 break; 287 case CEED_EVAL_GRAD: 288 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 289 for (CeedInt d=0; d<dim; d++) 290 eval_mode_out[num_eval_mode_out+d] = eval_mode; 291 num_eval_mode_out += dim; 292 break; 293 case CEED_EVAL_WEIGHT: 294 case CEED_EVAL_DIV: 295 case CEED_EVAL_CURL: 296 break; // Caught by QF Assembly 297 } 298 } 299 } 300 301 // Assemble point block diagonal restriction, if needed 302 CeedElemRestriction diag_rstr = rstr_out; 303 if (is_pointblock) { 304 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 305 CeedChk(ierr); 306 } 307 308 // Create diagonal vector 309 CeedVector elem_diag; 310 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 311 CeedChk(ierr); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array, *assembled_qf_array; 315 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 316 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 317 CeedChk(ierr); 318 ierr = CeedVectorGetArray(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 319 CeedChk(ierr); 320 CeedInt num_elem, num_nodes, num_qpts; 321 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 322 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 323 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 324 // Basis matrices 325 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 326 CeedScalar *identity = NULL; 327 bool evalNone = false; 328 for (CeedInt i=0; i<num_eval_mode_in; i++) 329 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 330 for (CeedInt i=0; i<num_eval_mode_out; i++) 331 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 332 if (evalNone) { 333 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 334 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 335 identity[i*num_nodes+i] = 1.0; 336 } 337 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 338 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 339 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 340 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 341 // Compute the diagonal of B^T D B 342 // Each element 343 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 344 for (CeedInt e=0; e<num_elem; e++) { 345 CeedInt d_out = -1; 346 // Each basis eval mode pair 347 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 348 const CeedScalar *bt = NULL; 349 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 350 d_out += 1; 351 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 352 &grad_out[d_out*num_qpts*num_nodes], &bt); 353 CeedInt d_in = -1; 354 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 355 const CeedScalar *b = NULL; 356 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 357 d_in += 1; 358 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 359 &grad_in[d_in*num_qpts*num_nodes], &b); 360 // Each component 361 for (CeedInt c_out=0; c_out<num_comp; c_out++) 362 // Each qpoint/node pair 363 for (CeedInt q=0; q<num_qpts; q++) 364 if (is_pointblock) { 365 // Point Block Diagonal 366 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 367 const CeedScalar qf_value = 368 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 369 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 370 if (fabs(qf_value) > qf_value_bound) 371 for (CeedInt n=0; n<num_nodes; n++) 372 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 373 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 374 } 375 } else { 376 // Diagonal Only 377 const CeedScalar qf_value = 378 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 379 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 380 if (fabs(qf_value) > qf_value_bound) 381 for (CeedInt n=0; n<num_nodes; n++) 382 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 383 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 384 } 385 } 386 } 387 } 388 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 389 ierr = CeedVectorRestoreArray(assembled_qf, &assembled_qf_array); CeedChk(ierr); 390 391 // Assemble local operator diagonal 392 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 393 assembled, request); CeedChk(ierr); 394 395 // Cleanup 396 if (is_pointblock) { 397 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 398 } 399 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 400 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 401 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 402 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 403 ierr = CeedFree(&identity); CeedChk(ierr); 404 405 return CEED_ERROR_SUCCESS; 406 } 407 408 /** 409 @brief Core logic for assembling composite operator diagonal 410 411 @param[in] op CeedOperator to assemble point block diagonal 412 @param[in] request Address of CeedRequest for non-blocking completion, else 413 CEED_REQUEST_IMMEDIATE 414 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 415 @param[out] assembled CeedVector to store assembled diagonal 416 417 @return An error code: 0 - success, otherwise - failure 418 419 @ref Developer 420 **/ 421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 422 CeedOperator op, CeedRequest *request, const bool is_pointblock, 423 CeedVector assembled) { 424 int ierr; 425 CeedInt num_sub; 426 CeedOperator *suboperators; 427 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 428 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 429 for (CeedInt i = 0; i < num_sub; i++) { 430 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 431 is_pointblock, assembled); CeedChk(ierr); 432 } 433 return CEED_ERROR_SUCCESS; 434 } 435 436 /** 437 @brief Build nonzero pattern for non-composite operator 438 439 Users should generally use CeedOperatorLinearAssembleSymbolic() 440 441 @param[in] op CeedOperator to assemble nonzero pattern 442 @param[in] offset Offset for number of entries 443 @param[out] rows Row number for each entry 444 @param[out] cols Column number for each entry 445 446 @return An error code: 0 - success, otherwise - failure 447 448 @ref Developer 449 **/ 450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 451 CeedInt *rows, CeedInt *cols) { 452 int ierr; 453 Ceed ceed = op->ceed; 454 if (op->composite) 455 // LCOV_EXCL_START 456 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 457 "Composite operator not supported"); 458 // LCOV_EXCL_STOP 459 460 CeedElemRestriction rstr_in; 461 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 462 CeedInt num_elem, elem_size, num_nodes, num_comp; 463 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 464 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 465 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 466 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 467 CeedInt layout_er[3]; 468 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 469 470 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 471 472 // Determine elem_dof relation 473 CeedVector index_vec; 474 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 475 CeedScalar *array; 476 ierr = CeedVectorGetArray(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 477 for (CeedInt i = 0; i < num_nodes; ++i) { 478 array[i] = i; 479 } 480 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 481 CeedVector elem_dof; 482 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 483 CeedChk(ierr); 484 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 485 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 486 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 487 const CeedScalar *elem_dof_a; 488 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 489 CeedChk(ierr); 490 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 491 492 // Determine i, j locations for element matrices 493 CeedInt count = 0; 494 for (int e = 0; e < num_elem; ++e) { 495 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 496 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 497 for (int i = 0; i < elem_size; ++i) { 498 for (int j = 0; j < elem_size; ++j) { 499 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 500 (comp_out)*layout_er[1] + e*layout_er[2]; 501 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 502 (comp_in)*layout_er[1] + e*layout_er[2]; 503 504 const CeedInt row = elem_dof_a[elem_dof_index_row]; 505 const CeedInt col = elem_dof_a[elem_dof_index_col]; 506 507 rows[offset + count] = row; 508 cols[offset + count] = col; 509 count++; 510 } 511 } 512 } 513 } 514 } 515 if (count != local_num_entries) 516 // LCOV_EXCL_START 517 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 518 // LCOV_EXCL_STOP 519 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 520 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 521 522 return CEED_ERROR_SUCCESS; 523 } 524 525 /** 526 @brief Assemble nonzero entries for non-composite operator 527 528 Users should generally use CeedOperatorLinearAssemble() 529 530 @param[in] op CeedOperator to assemble 531 @param[out] offset Offest for number of entries 532 @param[out] values Values to assemble into matrix 533 534 @return An error code: 0 - success, otherwise - failure 535 536 @ref Developer 537 **/ 538 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 539 CeedVector values) { 540 int ierr; 541 Ceed ceed = op->ceed; 542 if (op->composite) 543 // LCOV_EXCL_START 544 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 545 "Composite operator not supported"); 546 // LCOV_EXCL_STOP 547 548 // Assemble QFunction 549 CeedQFunction qf; 550 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 551 CeedVector assembled_qf; 552 CeedElemRestriction rstr_q; 553 ierr = CeedOperatorLinearAssembleQFunction( 554 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 555 556 CeedInt qf_length; 557 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 558 559 CeedInt num_input_fields, num_output_fields; 560 CeedOperatorField *input_fields; 561 CeedOperatorField *output_fields; 562 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 563 &num_output_fields, &output_fields); CeedChk(ierr); 564 565 // Determine active input basis 566 CeedQFunctionField *qf_fields; 567 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 568 CeedInt num_eval_mode_in = 0, dim = 1; 569 CeedEvalMode *eval_mode_in = NULL; 570 CeedBasis basis_in = NULL; 571 CeedElemRestriction rstr_in = NULL; 572 for (CeedInt i=0; i<num_input_fields; i++) { 573 CeedVector vec; 574 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 575 if (vec == CEED_VECTOR_ACTIVE) { 576 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 577 CeedChk(ierr); 578 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 579 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 580 CeedChk(ierr); 581 CeedEvalMode eval_mode; 582 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 583 CeedChk(ierr); 584 switch (eval_mode) { 585 case CEED_EVAL_NONE: 586 case CEED_EVAL_INTERP: 587 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 588 eval_mode_in[num_eval_mode_in] = eval_mode; 589 num_eval_mode_in += 1; 590 break; 591 case CEED_EVAL_GRAD: 592 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 593 for (CeedInt d=0; d<dim; d++) { 594 eval_mode_in[num_eval_mode_in+d] = eval_mode; 595 } 596 num_eval_mode_in += dim; 597 break; 598 case CEED_EVAL_WEIGHT: 599 case CEED_EVAL_DIV: 600 case CEED_EVAL_CURL: 601 break; // Caught by QF Assembly 602 } 603 } 604 } 605 606 // Determine active output basis 607 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 608 CeedInt num_eval_mode_out = 0; 609 CeedEvalMode *eval_mode_out = NULL; 610 CeedBasis basis_out = NULL; 611 CeedElemRestriction rstr_out = NULL; 612 for (CeedInt i=0; i<num_output_fields; i++) { 613 CeedVector vec; 614 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 615 if (vec == CEED_VECTOR_ACTIVE) { 616 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 617 CeedChk(ierr); 618 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 619 CeedChk(ierr); 620 CeedEvalMode eval_mode; 621 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 622 CeedChk(ierr); 623 switch (eval_mode) { 624 case CEED_EVAL_NONE: 625 case CEED_EVAL_INTERP: 626 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 627 eval_mode_out[num_eval_mode_out] = eval_mode; 628 num_eval_mode_out += 1; 629 break; 630 case CEED_EVAL_GRAD: 631 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 632 for (CeedInt d=0; d<dim; d++) { 633 eval_mode_out[num_eval_mode_out+d] = eval_mode; 634 } 635 num_eval_mode_out += dim; 636 break; 637 case CEED_EVAL_WEIGHT: 638 case CEED_EVAL_DIV: 639 case CEED_EVAL_CURL: 640 break; // Caught by QF Assembly 641 } 642 } 643 } 644 645 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 646 // LCOV_EXCL_START 647 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 648 "Cannot assemble operator with out inputs/outputs"); 649 // LCOV_EXCL_STOP 650 651 CeedInt num_elem, elem_size, num_qpts, num_comp; 652 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 653 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 654 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 655 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 656 657 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 658 659 // loop over elements and put in data structure 660 const CeedScalar *interp_in, *grad_in; 661 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 662 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 663 664 const CeedScalar *assembled_qf_array; 665 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 666 CeedChk(ierr); 667 668 CeedInt layout_qf[3]; 669 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 670 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 671 672 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 673 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 674 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 675 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 676 num_qpts]; // logically 3-tensor 677 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 678 CeedScalar elem_mat[elem_size * elem_size]; 679 int count = 0; 680 CeedScalar *vals; 681 ierr = CeedVectorGetArray(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 682 for (int e = 0; e < num_elem; ++e) { 683 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 684 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 685 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 686 B_mat_in[ell] = 0.0; 687 } 688 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 689 B_mat_out[ell] = 0.0; 690 } 691 // Store block-diagonal D matrix as collection of small dense blocks 692 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 693 D_mat[ell] = 0.0; 694 } 695 // form element matrix itself (for each block component) 696 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 697 elem_mat[ell] = 0.0; 698 } 699 for (int q = 0; q < num_qpts; ++q) { 700 for (int n = 0; n < elem_size; ++n) { 701 CeedInt d_in = -1; 702 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 703 const int qq = num_eval_mode_in*q; 704 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 705 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 706 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 707 d_in += 1; 708 B_mat_in[(qq+e_in)*elem_size + n] += 709 grad_in[(d_in*num_qpts+q) * elem_size + n]; 710 } else { 711 // LCOV_EXCL_START 712 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 713 // LCOV_EXCL_STOP 714 } 715 } 716 CeedInt d_out = -1; 717 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 718 const int qq = num_eval_mode_out*q; 719 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 720 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 721 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 722 d_out += 1; 723 B_mat_out[(qq+e_out)*elem_size + n] += 724 grad_in[(d_out*num_qpts+q) * elem_size + n]; 725 } else { 726 // LCOV_EXCL_START 727 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 728 // LCOV_EXCL_STOP 729 } 730 } 731 } 732 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 733 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 734 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 735 +comp_out; 736 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 737 e*layout_qf[2]; 738 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 739 } 740 } 741 } 742 // Compute B^T*D 743 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 744 BTD[ell] = 0.0; 745 } 746 for (int j = 0; j<elem_size; ++j) { 747 for (int q = 0; q<num_qpts; ++q) { 748 int qq = num_eval_mode_out*q; 749 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 750 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 751 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 752 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 753 } 754 } 755 } 756 } 757 758 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 759 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 760 761 // put element matrix in coordinate data structure 762 for (int i = 0; i < elem_size; ++i) { 763 for (int j = 0; j < elem_size; ++j) { 764 vals[offset + count] = elem_mat[i*elem_size + j]; 765 count++; 766 } 767 } 768 } 769 } 770 } 771 if (count != local_num_entries) 772 // LCOV_EXCL_START 773 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 774 // LCOV_EXCL_STOP 775 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 776 777 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 778 CeedChk(ierr); 779 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 780 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 781 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 782 783 return CEED_ERROR_SUCCESS; 784 } 785 786 /** 787 @brief Count number of entries for assembled CeedOperator 788 789 @param[in] op CeedOperator to assemble 790 @param[out] num_entries Number of entries in assembled representation 791 792 @return An error code: 0 - success, otherwise - failure 793 794 @ref Utility 795 **/ 796 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 797 CeedInt *num_entries) { 798 int ierr; 799 CeedElemRestriction rstr; 800 CeedInt num_elem, elem_size, num_comp; 801 802 if (op->composite) 803 // LCOV_EXCL_START 804 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 805 "Composite operator not supported"); 806 // LCOV_EXCL_STOP 807 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 808 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 809 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 810 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 811 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 812 813 return CEED_ERROR_SUCCESS; 814 } 815 816 /** 817 @brief Common code for creating a multigrid coarse operator and level 818 transfer operators for a CeedOperator 819 820 @param[in] op_fine Fine grid operator 821 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 822 @param[in] rstr_coarse Coarse grid restriction 823 @param[in] basis_coarse Coarse grid active vector basis 824 @param[in] basis_c_to_f Basis for coarse to fine interpolation 825 @param[out] op_coarse Coarse grid operator 826 @param[out] op_prolong Coarse to fine operator 827 @param[out] op_restrict Fine to coarse operator 828 829 @return An error code: 0 - success, otherwise - failure 830 831 @ref Developer 832 **/ 833 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 834 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 835 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 836 CeedOperator *op_restrict) { 837 int ierr; 838 Ceed ceed; 839 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 840 841 // Check for composite operator 842 bool is_composite; 843 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 844 if (is_composite) 845 // LCOV_EXCL_START 846 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 847 "Automatic multigrid setup for composite operators not supported"); 848 // LCOV_EXCL_STOP 849 850 // Coarse Grid 851 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 852 op_coarse); CeedChk(ierr); 853 CeedElemRestriction rstr_fine = NULL; 854 // -- Clone input fields 855 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 856 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 857 rstr_fine = op_fine->input_fields[i]->elem_restr; 858 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 859 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 860 CeedChk(ierr); 861 } else { 862 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 863 op_fine->input_fields[i]->elem_restr, 864 op_fine->input_fields[i]->basis, 865 op_fine->input_fields[i]->vec); CeedChk(ierr); 866 } 867 } 868 // -- Clone output fields 869 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 870 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 871 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 872 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 873 CeedChk(ierr); 874 } else { 875 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 876 op_fine->output_fields[i]->elem_restr, 877 op_fine->output_fields[i]->basis, 878 op_fine->output_fields[i]->vec); CeedChk(ierr); 879 } 880 } 881 882 // Multiplicity vector 883 CeedVector mult_vec, mult_e_vec; 884 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 885 CeedChk(ierr); 886 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 887 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 888 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 889 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 890 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 891 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 892 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 893 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 894 895 // Restriction 896 CeedInt num_comp; 897 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 898 CeedQFunction qf_restrict; 899 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 900 CeedChk(ierr); 901 CeedInt *num_comp_r_data; 902 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 903 num_comp_r_data[0] = num_comp; 904 CeedQFunctionContext ctx_r; 905 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 906 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 907 sizeof(*num_comp_r_data), num_comp_r_data); 908 CeedChk(ierr); 909 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 910 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 911 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 912 CeedChk(ierr); 913 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 914 CeedChk(ierr); 915 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 916 CEED_EVAL_INTERP); CeedChk(ierr); 917 918 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 919 CEED_QFUNCTION_NONE, op_restrict); 920 CeedChk(ierr); 921 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 922 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 923 CeedChk(ierr); 924 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 925 CEED_BASIS_COLLOCATED, mult_vec); 926 CeedChk(ierr); 927 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 928 CEED_VECTOR_ACTIVE); CeedChk(ierr); 929 930 // Prolongation 931 CeedQFunction qf_prolong; 932 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 933 CeedChk(ierr); 934 CeedInt *num_comp_p_data; 935 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 936 num_comp_p_data[0] = num_comp; 937 CeedQFunctionContext ctx_p; 938 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 939 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 940 sizeof(*num_comp_p_data), num_comp_p_data); 941 CeedChk(ierr); 942 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 943 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 944 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 945 CeedChk(ierr); 946 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 947 CeedChk(ierr); 948 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 949 CeedChk(ierr); 950 951 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 952 CEED_QFUNCTION_NONE, op_prolong); 953 CeedChk(ierr); 954 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 955 CEED_VECTOR_ACTIVE); CeedChk(ierr); 956 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 957 CEED_BASIS_COLLOCATED, mult_vec); 958 CeedChk(ierr); 959 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 960 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 961 CeedChk(ierr); 962 963 // Cleanup 964 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 965 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 966 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 967 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 968 return CEED_ERROR_SUCCESS; 969 } 970 971 /** 972 @brief Build 1D mass matrix and Laplacian with perturbation 973 974 @param[in] interp_1d Interpolation matrix in one dimension 975 @param[in] grad_1d Gradient matrix in one dimension 976 @param[in] q_weight_1d Quadrature weights in one dimension 977 @param[in] P_1d Number of basis nodes in one dimension 978 @param[in] Q_1d Number of quadrature points in one dimension 979 @param[in] dim Dimension of basis 980 @param[out] mass Assembled mass matrix in one dimension 981 @param[out] laplace Assembled perturbed Laplacian in one dimension 982 983 @return An error code: 0 - success, otherwise - failure 984 985 @ref Developer 986 **/ 987 CeedPragmaOptimizeOff 988 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 989 const CeedScalar *grad_1d, 990 const CeedScalar *q_weight_1d, CeedInt P_1d, 991 CeedInt Q_1d, CeedInt dim, 992 CeedScalar *mass, CeedScalar *laplace) { 993 994 for (CeedInt i=0; i<P_1d; i++) 995 for (CeedInt j=0; j<P_1d; j++) { 996 CeedScalar sum = 0.0; 997 for (CeedInt k=0; k<Q_1d; k++) 998 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 999 mass[i+j*P_1d] = sum; 1000 } 1001 // -- Laplacian 1002 for (CeedInt i=0; i<P_1d; i++) 1003 for (CeedInt j=0; j<P_1d; j++) { 1004 CeedScalar sum = 0.0; 1005 for (CeedInt k=0; k<Q_1d; k++) 1006 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1007 laplace[i+j*P_1d] = sum; 1008 } 1009 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1010 for (CeedInt i=0; i<P_1d; i++) 1011 laplace[i+P_1d*i] += perturbation; 1012 return CEED_ERROR_SUCCESS; 1013 } 1014 CeedPragmaOptimizeOn 1015 1016 /// @} 1017 1018 /// ---------------------------------------------------------------------------- 1019 /// CeedOperator Public API 1020 /// ---------------------------------------------------------------------------- 1021 /// @addtogroup CeedOperatorUser 1022 /// @{ 1023 1024 /** 1025 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1026 1027 This returns a CeedVector containing a matrix at each quadrature point 1028 providing the action of the CeedQFunction associated with the CeedOperator. 1029 The vector 'assembled' is of shape 1030 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1031 and contains column-major matrices representing the action of the 1032 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1033 outputs are in the order provided by the user when adding CeedOperator fields. 1034 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1035 'v', provided in that order, would result in an assembled QFunction that 1036 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1037 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1038 1039 @param op CeedOperator to assemble CeedQFunction 1040 @param[out] assembled CeedVector to store assembled CeedQFunction at 1041 quadrature points 1042 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1043 CeedQFunction 1044 @param request Address of CeedRequest for non-blocking completion, else 1045 @ref CEED_REQUEST_IMMEDIATE 1046 1047 @return An error code: 0 - success, otherwise - failure 1048 1049 @ref User 1050 **/ 1051 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1052 CeedElemRestriction *rstr, 1053 CeedRequest *request) { 1054 int ierr; 1055 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1056 1057 // Backend version 1058 if (op->LinearAssembleQFunction) { 1059 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); CeedChk(ierr); 1060 return CEED_ERROR_SUCCESS; 1061 } else { 1062 // Fallback to reference Ceed 1063 if (!op->op_fallback) { 1064 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1065 } 1066 // Assemble 1067 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1068 rstr, request); CeedChk(ierr); 1069 return CEED_ERROR_SUCCESS; 1070 } 1071 } 1072 1073 /** 1074 @brief Assemble the diagonal of a square linear CeedOperator 1075 1076 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1077 1078 Note: Currently only non-composite CeedOperators with a single field and 1079 composite CeedOperators with single field sub-operators are supported. 1080 1081 @param op CeedOperator to assemble CeedQFunction 1082 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1083 @param request Address of CeedRequest for non-blocking completion, else 1084 @ref CEED_REQUEST_IMMEDIATE 1085 1086 @return An error code: 0 - success, otherwise - failure 1087 1088 @ref User 1089 **/ 1090 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1091 CeedRequest *request) { 1092 int ierr; 1093 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1094 1095 // Use backend version, if available 1096 if (op->LinearAssembleDiagonal) { 1097 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1098 return CEED_ERROR_SUCCESS; 1099 } else if (op->LinearAssembleAddDiagonal) { 1100 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1101 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1102 return CEED_ERROR_SUCCESS; 1103 } else { 1104 // Check for valid fallback resource 1105 const char *resource, *fallback_resource; 1106 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1107 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1108 CeedChk(ierr); 1109 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1110 // Fallback to reference Ceed 1111 if (!op->op_fallback) { 1112 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1113 } 1114 // Assemble 1115 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1116 CeedChk(ierr); 1117 return CEED_ERROR_SUCCESS; 1118 } 1119 } 1120 1121 // Default interface implementation 1122 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1123 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1124 CeedChk(ierr); 1125 return CEED_ERROR_SUCCESS; 1126 } 1127 1128 /** 1129 @brief Assemble the diagonal of a square linear CeedOperator 1130 1131 This sums into a CeedVector the diagonal of a linear CeedOperator. 1132 1133 Note: Currently only non-composite CeedOperators with a single field and 1134 composite CeedOperators with single field sub-operators are supported. 1135 1136 @param op CeedOperator to assemble CeedQFunction 1137 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1138 @param request Address of CeedRequest for non-blocking completion, else 1139 @ref CEED_REQUEST_IMMEDIATE 1140 1141 @return An error code: 0 - success, otherwise - failure 1142 1143 @ref User 1144 **/ 1145 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1146 CeedRequest *request) { 1147 int ierr; 1148 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1149 1150 // Use backend version, if available 1151 if (op->LinearAssembleAddDiagonal) { 1152 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1153 return CEED_ERROR_SUCCESS; 1154 } else { 1155 // Check for valid fallback resource 1156 const char *resource, *fallback_resource; 1157 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1158 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1159 CeedChk(ierr); 1160 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1161 // Fallback to reference Ceed 1162 if (!op->op_fallback) { 1163 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1164 } 1165 // Assemble 1166 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1167 request); CeedChk(ierr); 1168 return CEED_ERROR_SUCCESS; 1169 } 1170 } 1171 1172 // Default interface implementation 1173 bool is_composite; 1174 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1175 if (is_composite) { 1176 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1177 false, assembled); CeedChk(ierr); 1178 return CEED_ERROR_SUCCESS; 1179 } else { 1180 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1181 CeedChk(ierr); 1182 return CEED_ERROR_SUCCESS; 1183 } 1184 } 1185 1186 /** 1187 @brief Assemble the point block diagonal of a square linear CeedOperator 1188 1189 This overwrites a CeedVector with the point block diagonal of a linear 1190 CeedOperator. 1191 1192 Note: Currently only non-composite CeedOperators with a single field and 1193 composite CeedOperators with single field sub-operators are supported. 1194 1195 @param op CeedOperator to assemble CeedQFunction 1196 @param[out] assembled CeedVector to store assembled CeedOperator point block 1197 diagonal, provided in row-major form with an 1198 @a num_comp * @a num_comp block at each node. The dimensions 1199 of this vector are derived from the active vector 1200 for the CeedOperator. The array has shape 1201 [nodes, component out, component in]. 1202 @param request Address of CeedRequest for non-blocking completion, else 1203 CEED_REQUEST_IMMEDIATE 1204 1205 @return An error code: 0 - success, otherwise - failure 1206 1207 @ref User 1208 **/ 1209 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1210 CeedVector assembled, CeedRequest *request) { 1211 int ierr; 1212 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1213 1214 // Use backend version, if available 1215 if (op->LinearAssemblePointBlockDiagonal) { 1216 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1217 CeedChk(ierr); 1218 return CEED_ERROR_SUCCESS; 1219 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1220 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1221 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1222 request); CeedChk(ierr); 1223 return CEED_ERROR_SUCCESS; 1224 } else { 1225 // Check for valid fallback resource 1226 const char *resource, *fallback_resource; 1227 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1228 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1229 CeedChk(ierr); 1230 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1231 // Fallback to reference Ceed 1232 if (!op->op_fallback) { 1233 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1234 } 1235 // Assemble 1236 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1237 assembled, request); CeedChk(ierr); 1238 return CEED_ERROR_SUCCESS; 1239 } 1240 } 1241 1242 // Default interface implementation 1243 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1244 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1245 CeedChk(ierr); 1246 return CEED_ERROR_SUCCESS; 1247 } 1248 1249 /** 1250 @brief Assemble the point block diagonal of a square linear CeedOperator 1251 1252 This sums into a CeedVector with the point block diagonal of a linear 1253 CeedOperator. 1254 1255 Note: Currently only non-composite CeedOperators with a single field and 1256 composite CeedOperators with single field sub-operators are supported. 1257 1258 @param op CeedOperator to assemble CeedQFunction 1259 @param[out] assembled CeedVector to store assembled CeedOperator point block 1260 diagonal, provided in row-major form with an 1261 @a num_comp * @a num_comp block at each node. The dimensions 1262 of this vector are derived from the active vector 1263 for the CeedOperator. The array has shape 1264 [nodes, component out, component in]. 1265 @param request Address of CeedRequest for non-blocking completion, else 1266 CEED_REQUEST_IMMEDIATE 1267 1268 @return An error code: 0 - success, otherwise - failure 1269 1270 @ref User 1271 **/ 1272 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1273 CeedVector assembled, CeedRequest *request) { 1274 int ierr; 1275 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1276 1277 // Use backend version, if available 1278 if (op->LinearAssembleAddPointBlockDiagonal) { 1279 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1280 CeedChk(ierr); 1281 return CEED_ERROR_SUCCESS; 1282 } else { 1283 // Check for valid fallback resource 1284 const char *resource, *fallback_resource; 1285 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1286 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1287 CeedChk(ierr); 1288 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1289 // Fallback to reference Ceed 1290 if (!op->op_fallback) { 1291 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1292 } 1293 // Assemble 1294 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1295 assembled, request); CeedChk(ierr); 1296 return CEED_ERROR_SUCCESS; 1297 } 1298 } 1299 1300 // Default interface implemenation 1301 bool is_composite; 1302 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1303 if (is_composite) { 1304 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1305 true, assembled); CeedChk(ierr); 1306 return CEED_ERROR_SUCCESS; 1307 } else { 1308 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1309 CeedChk(ierr); 1310 return CEED_ERROR_SUCCESS; 1311 } 1312 } 1313 1314 /** 1315 @brief Fully assemble the nonzero pattern of a linear operator. 1316 1317 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1318 1319 The assembly routines use coordinate format, with num_entries tuples of the 1320 form (i, j, value) which indicate that value should be added to the matrix 1321 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1322 This function returns the number of entries and their (i, j) locations, 1323 while CeedOperatorLinearAssemble() provides the values in the same 1324 ordering. 1325 1326 This will generally be slow unless your operator is low-order. 1327 1328 @param[in] op CeedOperator to assemble 1329 @param[out] num_entries Number of entries in coordinate nonzero pattern 1330 @param[out] rows Row number for each entry 1331 @param[out] cols Column number for each entry 1332 1333 @ref User 1334 **/ 1335 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries, 1336 CeedInt **rows, CeedInt **cols) { 1337 int ierr; 1338 CeedInt num_suboperators, single_entries; 1339 CeedOperator *sub_operators; 1340 bool is_composite; 1341 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1342 1343 // Use backend version, if available 1344 if (op->LinearAssembleSymbolic) { 1345 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1346 return CEED_ERROR_SUCCESS; 1347 } else { 1348 // Check for valid fallback resource 1349 const char *resource, *fallback_resource; 1350 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1351 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1352 CeedChk(ierr); 1353 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1354 // Fallback to reference Ceed 1355 if (!op->op_fallback) { 1356 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1357 } 1358 // Assemble 1359 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1360 cols); CeedChk(ierr); 1361 return CEED_ERROR_SUCCESS; 1362 } 1363 } 1364 1365 // Default interface implementation 1366 1367 // count entries and allocate rows, cols arrays 1368 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1369 *num_entries = 0; 1370 if (is_composite) { 1371 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1372 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1373 for (int k = 0; k < num_suboperators; ++k) { 1374 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1375 &single_entries); CeedChk(ierr); 1376 *num_entries += single_entries; 1377 } 1378 } else { 1379 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1380 &single_entries); CeedChk(ierr); 1381 *num_entries += single_entries; 1382 } 1383 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1384 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1385 1386 // assemble nonzero locations 1387 CeedInt offset = 0; 1388 if (is_composite) { 1389 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1390 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1391 for (int k = 0; k < num_suboperators; ++k) { 1392 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1393 *cols); CeedChk(ierr); 1394 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1395 &single_entries); 1396 CeedChk(ierr); 1397 offset += single_entries; 1398 } 1399 } else { 1400 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1401 CeedChk(ierr); 1402 } 1403 1404 return CEED_ERROR_SUCCESS; 1405 } 1406 1407 /** 1408 @brief Fully assemble the nonzero entries of a linear operator. 1409 1410 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1411 1412 The assembly routines use coordinate format, with num_entries tuples of the 1413 form (i, j, value) which indicate that value should be added to the matrix 1414 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1415 This function returns the values of the nonzero entries to be added, their 1416 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1417 1418 This will generally be slow unless your operator is low-order. 1419 1420 @param[in] op CeedOperator to assemble 1421 @param[out] values Values to assemble into matrix 1422 1423 @ref User 1424 **/ 1425 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1426 int ierr; 1427 CeedInt num_suboperators, single_entries = 0; 1428 CeedOperator *sub_operators; 1429 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1430 1431 // Use backend version, if available 1432 if (op->LinearAssemble) { 1433 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1434 return CEED_ERROR_SUCCESS; 1435 } else { 1436 // Check for valid fallback resource 1437 const char *resource, *fallback_resource; 1438 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1439 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1440 CeedChk(ierr); 1441 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1442 // Fallback to reference Ceed 1443 if (!op->op_fallback) { 1444 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1445 } 1446 // Assemble 1447 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1448 return CEED_ERROR_SUCCESS; 1449 } 1450 } 1451 1452 // Default interface implementation 1453 bool is_composite; 1454 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1455 1456 CeedInt offset = 0; 1457 if (is_composite) { 1458 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1459 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1460 for (int k = 0; k < num_suboperators; ++k) { 1461 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1462 CeedChk(ierr); 1463 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1464 &single_entries); 1465 CeedChk(ierr); 1466 offset += single_entries; 1467 } 1468 } else { 1469 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1470 } 1471 1472 return CEED_ERROR_SUCCESS; 1473 } 1474 1475 /** 1476 @brief Create a multigrid coarse operator and level transfer operators 1477 for a CeedOperator, creating the prolongation basis from the 1478 fine and coarse grid interpolation 1479 1480 @param[in] op_fine Fine grid operator 1481 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1482 @param[in] rstr_coarse Coarse grid restriction 1483 @param[in] basis_coarse Coarse grid active vector basis 1484 @param[out] op_coarse Coarse grid operator 1485 @param[out] op_prolong Coarse to fine operator 1486 @param[out] op_restrict Fine to coarse operator 1487 1488 @return An error code: 0 - success, otherwise - failure 1489 1490 @ref User 1491 **/ 1492 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1493 CeedVector p_mult_fine, 1494 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1495 CeedOperator *op_coarse, CeedOperator *op_prolong, 1496 CeedOperator *op_restrict) { 1497 int ierr; 1498 Ceed ceed; 1499 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1500 1501 // Check for compatible quadrature spaces 1502 CeedBasis basis_fine; 1503 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1504 CeedInt Q_f, Q_c; 1505 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1506 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1507 if (Q_f != Q_c) 1508 // LCOV_EXCL_START 1509 return CeedError(ceed, CEED_ERROR_DIMENSION, 1510 "Bases must have compatible quadrature spaces"); 1511 // LCOV_EXCL_STOP 1512 1513 // Coarse to fine basis 1514 CeedInt P_f, P_c, Q = Q_f; 1515 bool is_tensor_f, is_tensor_c; 1516 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1517 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1518 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1519 if (is_tensor_f && is_tensor_c) { 1520 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1521 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1522 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1523 } else if (!is_tensor_f && !is_tensor_c) { 1524 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1525 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1526 } else { 1527 // LCOV_EXCL_START 1528 return CeedError(ceed, CEED_ERROR_MINOR, 1529 "Bases must both be tensor or non-tensor"); 1530 // LCOV_EXCL_STOP 1531 } 1532 1533 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1534 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1535 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1536 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1537 if (is_tensor_f) { 1538 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1539 memcpy(interp_c, basis_coarse->interp_1d, 1540 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1541 } else { 1542 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1543 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1544 } 1545 1546 // -- QR Factorization, interp_f = Q R 1547 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1548 1549 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1550 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1551 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1552 1553 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1554 for (CeedInt j=0; j<P_c; j++) { // Column j 1555 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1556 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1557 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1558 for (CeedInt k=i+1; k<P_f; k++) 1559 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1560 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1561 } 1562 } 1563 ierr = CeedFree(&tau); CeedChk(ierr); 1564 ierr = CeedFree(&interp_c); CeedChk(ierr); 1565 ierr = CeedFree(&interp_f); CeedChk(ierr); 1566 1567 // Complete with interp_c_to_f versions of code 1568 if (is_tensor_f) { 1569 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1570 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1571 CeedChk(ierr); 1572 } else { 1573 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1574 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1575 CeedChk(ierr); 1576 } 1577 1578 // Cleanup 1579 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1580 return CEED_ERROR_SUCCESS; 1581 } 1582 1583 /** 1584 @brief Create a multigrid coarse operator and level transfer operators 1585 for a CeedOperator with a tensor basis for the active basis 1586 1587 @param[in] op_fine Fine grid operator 1588 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1589 @param[in] rstr_coarse Coarse grid restriction 1590 @param[in] basis_coarse Coarse grid active vector basis 1591 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1592 @param[out] op_coarse Coarse grid operator 1593 @param[out] op_prolong Coarse to fine operator 1594 @param[out] op_restrict Fine to coarse operator 1595 1596 @return An error code: 0 - success, otherwise - failure 1597 1598 @ref User 1599 **/ 1600 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1601 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1602 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1603 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1604 int ierr; 1605 Ceed ceed; 1606 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1607 1608 // Check for compatible quadrature spaces 1609 CeedBasis basis_fine; 1610 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1611 CeedInt Q_f, Q_c; 1612 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1613 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1614 if (Q_f != Q_c) 1615 // LCOV_EXCL_START 1616 return CeedError(ceed, CEED_ERROR_DIMENSION, 1617 "Bases must have compatible quadrature spaces"); 1618 // LCOV_EXCL_STOP 1619 1620 // Coarse to fine basis 1621 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1622 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1623 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1624 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1625 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1626 CeedChk(ierr); 1627 P_1d_c = dim == 1 ? num_nodes_c : 1628 dim == 2 ? sqrt(num_nodes_c) : 1629 cbrt(num_nodes_c); 1630 CeedScalar *q_ref, *q_weight, *grad; 1631 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1632 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1633 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1634 CeedBasis basis_c_to_f; 1635 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1636 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1637 CeedChk(ierr); 1638 ierr = CeedFree(&q_ref); CeedChk(ierr); 1639 ierr = CeedFree(&q_weight); CeedChk(ierr); 1640 ierr = CeedFree(&grad); CeedChk(ierr); 1641 1642 // Core code 1643 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1644 basis_coarse, basis_c_to_f, op_coarse, 1645 op_prolong, op_restrict); 1646 CeedChk(ierr); 1647 return CEED_ERROR_SUCCESS; 1648 } 1649 1650 /** 1651 @brief Create a multigrid coarse operator and level transfer operators 1652 for a CeedOperator with a non-tensor basis for the active vector 1653 1654 @param[in] op_fine Fine grid operator 1655 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1656 @param[in] rstr_coarse Coarse grid restriction 1657 @param[in] basis_coarse Coarse grid active vector basis 1658 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1659 @param[out] op_coarse Coarse grid operator 1660 @param[out] op_prolong Coarse to fine operator 1661 @param[out] op_restrict Fine to coarse operator 1662 1663 @return An error code: 0 - success, otherwise - failure 1664 1665 @ref User 1666 **/ 1667 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1668 CeedVector p_mult_fine, 1669 CeedElemRestriction rstr_coarse, 1670 CeedBasis basis_coarse, 1671 const CeedScalar *interp_c_to_f, 1672 CeedOperator *op_coarse, 1673 CeedOperator *op_prolong, 1674 CeedOperator *op_restrict) { 1675 int ierr; 1676 Ceed ceed; 1677 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1678 1679 // Check for compatible quadrature spaces 1680 CeedBasis basis_fine; 1681 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1682 CeedInt Q_f, Q_c; 1683 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1684 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1685 if (Q_f != Q_c) 1686 // LCOV_EXCL_START 1687 return CeedError(ceed, CEED_ERROR_DIMENSION, 1688 "Bases must have compatible quadrature spaces"); 1689 // LCOV_EXCL_STOP 1690 1691 // Coarse to fine basis 1692 CeedElemTopology topo; 1693 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1694 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1695 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1696 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1697 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1698 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1699 CeedChk(ierr); 1700 CeedScalar *q_ref, *q_weight, *grad; 1701 ierr = CeedCalloc(num_nodes_f, &q_ref); CeedChk(ierr); 1702 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1703 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 1704 CeedBasis basis_c_to_f; 1705 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 1706 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1707 CeedChk(ierr); 1708 ierr = CeedFree(&q_ref); CeedChk(ierr); 1709 ierr = CeedFree(&q_weight); CeedChk(ierr); 1710 ierr = CeedFree(&grad); CeedChk(ierr); 1711 1712 // Core code 1713 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1714 basis_coarse, basis_c_to_f, op_coarse, 1715 op_prolong, op_restrict); 1716 CeedChk(ierr); 1717 return CEED_ERROR_SUCCESS; 1718 } 1719 1720 /** 1721 @brief Build a FDM based approximate inverse for each element for a 1722 CeedOperator 1723 1724 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 1725 Method based approximate inverse. This function obtains the simultaneous 1726 diagonalization for the 1D mass and Laplacian operators, 1727 M = V^T V, K = V^T S V. 1728 The assembled QFunction is used to modify the eigenvalues from simultaneous 1729 diagonalization and obtain an approximate inverse of the form 1730 V^T S^hat V. The CeedOperator must be linear and non-composite. The 1731 associated CeedQFunction must therefore also be linear. 1732 1733 @param op CeedOperator to create element inverses 1734 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 1735 for each element 1736 @param request Address of CeedRequest for non-blocking completion, else 1737 @ref CEED_REQUEST_IMMEDIATE 1738 1739 @return An error code: 0 - success, otherwise - failure 1740 1741 @ref Backend 1742 **/ 1743 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 1744 CeedRequest *request) { 1745 int ierr; 1746 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1747 1748 // Use backend version, if available 1749 if (op->CreateFDMElementInverse) { 1750 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 1751 return CEED_ERROR_SUCCESS; 1752 } else { 1753 // Check for valid fallback resource 1754 const char *resource, *fallback_resource; 1755 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1756 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1757 CeedChk(ierr); 1758 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1759 // Fallback to reference Ceed 1760 if (!op->op_fallback) { 1761 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1762 } 1763 // Assemble 1764 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 1765 CeedChk(ierr); 1766 return CEED_ERROR_SUCCESS; 1767 } 1768 } 1769 1770 // Interface implementation 1771 Ceed ceed, ceed_parent; 1772 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1773 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 1774 ceed_parent = ceed_parent ? ceed_parent : ceed; 1775 CeedQFunction qf; 1776 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 1777 1778 // Determine active input basis 1779 bool interp = false, grad = false; 1780 CeedBasis basis = NULL; 1781 CeedElemRestriction rstr = NULL; 1782 CeedOperatorField *op_fields; 1783 CeedQFunctionField *qf_fields; 1784 CeedInt num_input_fields; 1785 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 1786 CeedChk(ierr); 1787 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 1788 for (CeedInt i=0; i<num_input_fields; i++) { 1789 CeedVector vec; 1790 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 1791 if (vec == CEED_VECTOR_ACTIVE) { 1792 CeedEvalMode eval_mode; 1793 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 1794 interp = interp || eval_mode == CEED_EVAL_INTERP; 1795 grad = grad || eval_mode == CEED_EVAL_GRAD; 1796 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 1797 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 1798 } 1799 } 1800 if (!basis) 1801 // LCOV_EXCL_START 1802 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 1803 // LCOV_EXCL_STOP 1804 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1, 1805 l_size = 1; 1806 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 1807 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 1808 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 1809 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 1810 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 1811 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 1812 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 1813 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 1814 1815 // Build and diagonalize 1D Mass and Laplacian 1816 bool tensor_basis; 1817 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 1818 if (!tensor_basis) 1819 // LCOV_EXCL_START 1820 return CeedError(ceed, CEED_ERROR_BACKEND, 1821 "FDMElementInverse only supported for tensor " 1822 "bases"); 1823 // LCOV_EXCL_STOP 1824 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 1825 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 1826 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 1827 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 1828 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 1829 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 1830 // -- Build matrices 1831 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 1832 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 1833 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 1834 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 1835 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 1836 mass, laplace); CeedChk(ierr); 1837 1838 // -- Diagonalize 1839 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 1840 CeedChk(ierr); 1841 ierr = CeedFree(&mass); CeedChk(ierr); 1842 ierr = CeedFree(&laplace); CeedChk(ierr); 1843 for (CeedInt i=0; i<P_1d; i++) 1844 for (CeedInt j=0; j<P_1d; j++) 1845 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 1846 ierr = CeedFree(&x); CeedChk(ierr); 1847 1848 // Assemble QFunction 1849 CeedVector assembled; 1850 CeedElemRestriction rstr_qf; 1851 ierr = CeedOperatorLinearAssembleQFunction(op, &assembled, &rstr_qf, request); 1852 CeedChk(ierr); 1853 CeedInt layout[3]; 1854 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 1855 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 1856 CeedScalar max_norm = 0; 1857 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 1858 1859 // Calculate element averages 1860 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 1861 CeedScalar *elem_avg; 1862 const CeedScalar *assembled_array, *q_weight_array; 1863 CeedVector q_weight; 1864 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 1865 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 1866 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 1867 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 1868 CeedChk(ierr); 1869 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 1870 CeedChk(ierr); 1871 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 1872 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 1873 for (CeedInt e=0; e<num_elem; e++) { 1874 CeedInt count = 0; 1875 for (CeedInt q=0; q<num_qpts; q++) 1876 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 1877 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 1878 qf_value_bound) { 1879 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 1880 q_weight_array[q]; 1881 count++; 1882 } 1883 if (count) { 1884 elem_avg[e] /= count; 1885 } else { 1886 elem_avg[e] = 1.0; 1887 } 1888 } 1889 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 1890 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 1891 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 1892 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 1893 1894 // Build FDM diagonal 1895 CeedVector q_data; 1896 CeedScalar *q_data_array, *fdm_diagonal; 1897 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 1898 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 1899 for (CeedInt c=0; c<num_comp; c++) 1900 for (CeedInt n=0; n<elem_size; n++) { 1901 if (interp) 1902 fdm_diagonal[c*elem_size + n] = 1.0; 1903 if (grad) 1904 for (CeedInt d=0; d<dim; d++) { 1905 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 1906 fdm_diagonal[c*elem_size + n] += lambda[i]; 1907 } 1908 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 1909 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 1910 } 1911 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 1912 CeedChk(ierr); 1913 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 1914 ierr = CeedVectorGetArray(q_data, CEED_MEM_HOST, &q_data_array); CeedChk(ierr); 1915 for (CeedInt e=0; e<num_elem; e++) 1916 for (CeedInt c=0; c<num_comp; c++) 1917 for (CeedInt n=0; n<elem_size; n++) 1918 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 1919 fdm_diagonal[c*elem_size + n]); 1920 ierr = CeedFree(&elem_avg); CeedChk(ierr); 1921 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 1922 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 1923 1924 // Setup FDM operator 1925 // -- Basis 1926 CeedBasis fdm_basis; 1927 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 1928 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 1929 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 1930 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 1931 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 1932 fdm_interp, grad_dummy, q_ref_dummy, 1933 q_weight_dummy, &fdm_basis); CeedChk(ierr); 1934 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 1935 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 1936 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 1937 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 1938 ierr = CeedFree(&lambda); CeedChk(ierr); 1939 1940 // -- Restriction 1941 CeedElemRestriction rstr_qd_i; 1942 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 1943 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 1944 num_comp, num_elem*num_comp*elem_size, 1945 strides, &rstr_qd_i); CeedChk(ierr); 1946 // -- QFunction 1947 CeedQFunction qf_fdm; 1948 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 1949 CeedChk(ierr); 1950 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 1951 CeedChk(ierr); 1952 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 1953 CeedChk(ierr); 1954 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 1955 CeedChk(ierr); 1956 // -- QFunction context 1957 CeedInt *num_comp_data; 1958 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 1959 num_comp_data[0] = num_comp; 1960 CeedQFunctionContext ctx_fdm; 1961 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 1962 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 1963 sizeof(*num_comp_data), num_comp_data); 1964 CeedChk(ierr); 1965 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 1966 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 1967 // -- Operator 1968 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 1969 CeedChk(ierr); 1970 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 1971 CeedChk(ierr); 1972 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 1973 q_data); CeedChk(ierr); 1974 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 1975 CeedChk(ierr); 1976 1977 // Cleanup 1978 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 1979 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 1980 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 1981 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 1982 1983 return CEED_ERROR_SUCCESS; 1984 } 1985 1986 /// @} 1987