1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->interface_setup = false; 75 op_ref->backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 op->op_fallback = op_ref; 79 80 // Clone QF 81 CeedQFunction qf_ref; 82 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 83 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 84 qf_ref->data = NULL; 85 qf_ref->ceed = ceed_ref; 86 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 87 op_ref->qf = qf_ref; 88 op->qf_fallback = qf_ref; 89 return CEED_ERROR_SUCCESS; 90 } 91 92 /** 93 @brief Select correct basis matrix pointer based on CeedEvalMode 94 95 @param[in] eval_mode Current basis evaluation mode 96 @param[in] identity Pointer to identity matrix 97 @param[in] interp Pointer to interpolation matrix 98 @param[in] grad Pointer to gradient matrix 99 @param[out] basis_ptr Basis pointer to set 100 101 @return none 102 103 @ref Developer 104 **/ 105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 106 const CeedScalar *identity, const CeedScalar *interp, 107 const CeedScalar *grad, const CeedScalar **basis_ptr) { 108 switch (eval_mode) { 109 case CEED_EVAL_NONE: 110 *basis_ptr = identity; 111 break; 112 case CEED_EVAL_INTERP: 113 *basis_ptr = interp; 114 break; 115 case CEED_EVAL_GRAD: 116 *basis_ptr = grad; 117 break; 118 case CEED_EVAL_WEIGHT: 119 case CEED_EVAL_DIV: 120 case CEED_EVAL_CURL: 121 break; // Caught by QF Assembly 122 } 123 } 124 125 /** 126 @brief Create point block restriction for active operator field 127 128 @param[in] rstr Original CeedElemRestriction for active field 129 @param[out] pointblock_rstr Address of the variable where the newly created 130 CeedElemRestriction will be stored 131 132 @return An error code: 0 - success, otherwise - failure 133 134 @ref Developer 135 **/ 136 static int CeedOperatorCreateActivePointBlockRestriction( 137 CeedElemRestriction rstr, 138 CeedElemRestriction *pointblock_rstr) { 139 int ierr; 140 Ceed ceed; 141 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 142 const CeedInt *offsets; 143 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 144 CeedChk(ierr); 145 146 // Expand offsets 147 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 148 *pointblock_offsets; 149 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 152 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 153 CeedInt shift = num_comp; 154 if (comp_stride != 1) 155 shift *= num_comp; 156 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 157 CeedChk(ierr); 158 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 159 pointblock_offsets[i] = offsets[i]*shift; 160 if (pointblock_offsets[i] > max) 161 max = pointblock_offsets[i]; 162 } 163 164 // Create new restriction 165 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 166 1, max + num_comp*num_comp, CEED_MEM_HOST, 167 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 168 CeedChk(ierr); 169 170 // Cleanup 171 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 172 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else 181 CEED_REQUEST_IMMEDIATE 182 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 183 @param[out] assembled CeedVector to store assembled diagonal 184 185 @return An error code: 0 - success, otherwise - failure 186 187 @ref Developer 188 **/ 189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 190 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 191 int ierr; 192 Ceed ceed; 193 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 194 195 // Assemble QFunction 196 CeedQFunction qf; 197 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 198 CeedInt num_input_fields, num_output_fields; 199 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 200 CeedChk(ierr); 201 CeedVector assembled_qf; 202 CeedElemRestriction rstr; 203 ierr = CeedOperatorLinearAssembleQFunction(op, &assembled_qf, &rstr, request); 204 CeedChk(ierr); 205 CeedInt layout[3]; 206 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 207 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 208 CeedScalar max_norm = 0; 209 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 210 211 // Determine active input basis 212 CeedOperatorField *op_fields; 213 CeedQFunctionField *qf_fields; 214 ierr = CeedOperatorGetFields(op, &op_fields, NULL); CeedChk(ierr); 215 ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChk(ierr); 216 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 217 CeedEvalMode *eval_mode_in = NULL; 218 CeedBasis basis_in = NULL; 219 CeedElemRestriction rstr_in = NULL; 220 for (CeedInt i=0; i<num_input_fields; i++) { 221 CeedVector vec; 222 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 223 if (vec == CEED_VECTOR_ACTIVE) { 224 CeedElemRestriction rstr; 225 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 226 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 227 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 228 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 229 if (rstr_in && rstr_in != rstr) 230 // LCOV_EXCL_START 231 return CeedError(ceed, CEED_ERROR_BACKEND, 232 "Multi-field non-composite operator diagonal assembly not supported"); 233 // LCOV_EXCL_STOP 234 rstr_in = rstr; 235 CeedEvalMode eval_mode; 236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 237 switch (eval_mode) { 238 case CEED_EVAL_NONE: 239 case CEED_EVAL_INTERP: 240 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 241 eval_mode_in[num_eval_mode_in] = eval_mode; 242 num_eval_mode_in += 1; 243 break; 244 case CEED_EVAL_GRAD: 245 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 246 for (CeedInt d=0; d<dim; d++) 247 eval_mode_in[num_eval_mode_in+d] = eval_mode; 248 num_eval_mode_in += dim; 249 break; 250 case CEED_EVAL_WEIGHT: 251 case CEED_EVAL_DIV: 252 case CEED_EVAL_CURL: 253 break; // Caught by QF Assembly 254 } 255 } 256 } 257 258 // Determine active output basis 259 ierr = CeedOperatorGetFields(op, NULL, &op_fields); CeedChk(ierr); 260 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields); CeedChk(ierr); 261 CeedInt num_eval_mode_out = 0; 262 CeedEvalMode *eval_mode_out = NULL; 263 CeedBasis basis_out = NULL; 264 CeedElemRestriction rstr_out = NULL; 265 for (CeedInt i=0; i<num_output_fields; i++) { 266 CeedVector vec; 267 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 268 if (vec == CEED_VECTOR_ACTIVE) { 269 CeedElemRestriction rstr; 270 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 271 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 272 if (rstr_out && rstr_out != rstr) 273 // LCOV_EXCL_START 274 return CeedError(ceed, CEED_ERROR_BACKEND, 275 "Multi-field non-composite operator diagonal assembly not supported"); 276 // LCOV_EXCL_STOP 277 rstr_out = rstr; 278 CeedEvalMode eval_mode; 279 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 280 switch (eval_mode) { 281 case CEED_EVAL_NONE: 282 case CEED_EVAL_INTERP: 283 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 284 eval_mode_out[num_eval_mode_out] = eval_mode; 285 num_eval_mode_out += 1; 286 break; 287 case CEED_EVAL_GRAD: 288 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 289 for (CeedInt d=0; d<dim; d++) 290 eval_mode_out[num_eval_mode_out+d] = eval_mode; 291 num_eval_mode_out += dim; 292 break; 293 case CEED_EVAL_WEIGHT: 294 case CEED_EVAL_DIV: 295 case CEED_EVAL_CURL: 296 break; // Caught by QF Assembly 297 } 298 } 299 } 300 301 // Assemble point block diagonal restriction, if needed 302 CeedElemRestriction diag_rstr = rstr_out; 303 if (is_pointblock) { 304 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 305 CeedChk(ierr); 306 } 307 308 // Create diagonal vector 309 CeedVector elem_diag; 310 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 311 CeedChk(ierr); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array, *assembled_qf_array; 315 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 316 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 317 CeedChk(ierr); 318 ierr = CeedVectorGetArray(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 319 CeedChk(ierr); 320 CeedInt num_elem, num_nodes, num_qpts; 321 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 322 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 323 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 324 // Basis matrices 325 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 326 CeedScalar *identity = NULL; 327 bool evalNone = false; 328 for (CeedInt i=0; i<num_eval_mode_in; i++) 329 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 330 for (CeedInt i=0; i<num_eval_mode_out; i++) 331 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 332 if (evalNone) { 333 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 334 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 335 identity[i*num_nodes+i] = 1.0; 336 } 337 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 338 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 339 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 340 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 341 // Compute the diagonal of B^T D B 342 // Each element 343 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 344 for (CeedInt e=0; e<num_elem; e++) { 345 CeedInt d_out = -1; 346 // Each basis eval mode pair 347 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 348 const CeedScalar *bt = NULL; 349 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 350 d_out += 1; 351 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 352 &grad_out[d_out*num_qpts*num_nodes], &bt); 353 CeedInt d_in = -1; 354 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 355 const CeedScalar *b = NULL; 356 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 357 d_in += 1; 358 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 359 &grad_in[d_in*num_qpts*num_nodes], &b); 360 // Each component 361 for (CeedInt c_out=0; c_out<num_comp; c_out++) 362 // Each qpoint/node pair 363 for (CeedInt q=0; q<num_qpts; q++) 364 if (is_pointblock) { 365 // Point Block Diagonal 366 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 367 const CeedScalar qf_value = 368 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 369 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 370 if (fabs(qf_value) > qf_value_bound) 371 for (CeedInt n=0; n<num_nodes; n++) 372 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 373 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 374 } 375 } else { 376 // Diagonal Only 377 const CeedScalar qf_value = 378 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 379 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 380 if (fabs(qf_value) > qf_value_bound) 381 for (CeedInt n=0; n<num_nodes; n++) 382 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 383 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 384 } 385 } 386 } 387 } 388 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 389 ierr = CeedVectorRestoreArray(assembled_qf, &assembled_qf_array); CeedChk(ierr); 390 391 // Assemble local operator diagonal 392 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 393 assembled, request); CeedChk(ierr); 394 395 // Cleanup 396 if (is_pointblock) { 397 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 398 } 399 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 400 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 401 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 402 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 403 ierr = CeedFree(&identity); CeedChk(ierr); 404 405 return CEED_ERROR_SUCCESS; 406 } 407 408 /** 409 @brief Core logic for assembling composite operator diagonal 410 411 @param[in] op CeedOperator to assemble point block diagonal 412 @param[in] request Address of CeedRequest for non-blocking completion, else 413 CEED_REQUEST_IMMEDIATE 414 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 415 @param[out] assembled CeedVector to store assembled diagonal 416 417 @return An error code: 0 - success, otherwise - failure 418 419 @ref Developer 420 **/ 421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 422 CeedOperator op, CeedRequest *request, const bool is_pointblock, 423 CeedVector assembled) { 424 int ierr; 425 CeedInt num_sub; 426 CeedOperator *suboperators; 427 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 428 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 429 for (CeedInt i = 0; i < num_sub; i++) { 430 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 431 is_pointblock, assembled); CeedChk(ierr); 432 } 433 return CEED_ERROR_SUCCESS; 434 } 435 436 /** 437 @brief Build nonzero pattern for non-composite operator 438 439 Users should generally use CeedOperatorLinearAssembleSymbolic() 440 441 @param[in] op CeedOperator to assemble nonzero pattern 442 @param[in] offset Offset for number of entries 443 @param[out] rows Row number for each entry 444 @param[out] cols Column number for each entry 445 446 @return An error code: 0 - success, otherwise - failure 447 448 @ref Developer 449 **/ 450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 451 CeedInt *rows, CeedInt *cols) { 452 int ierr; 453 Ceed ceed = op->ceed; 454 if (op->composite) 455 // LCOV_EXCL_START 456 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 457 "Composite operator not supported"); 458 // LCOV_EXCL_STOP 459 460 CeedElemRestriction rstr_in; 461 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 462 CeedInt num_elem, elem_size, num_nodes, num_comp; 463 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 464 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 465 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 466 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 467 CeedInt layout_er[3]; 468 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 469 470 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 471 472 // Determine elem_dof relation 473 CeedVector index_vec; 474 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 475 CeedScalar *array; 476 ierr = CeedVectorGetArray(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 477 for (CeedInt i = 0; i < num_nodes; ++i) { 478 array[i] = i; 479 } 480 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 481 CeedVector elem_dof; 482 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 483 CeedChk(ierr); 484 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 485 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 486 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 487 const CeedScalar *elem_dof_a; 488 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 489 CeedChk(ierr); 490 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 491 492 // Determine i, j locations for element matrices 493 CeedInt count = 0; 494 for (int e = 0; e < num_elem; ++e) { 495 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 496 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 497 for (int i = 0; i < elem_size; ++i) { 498 for (int j = 0; j < elem_size; ++j) { 499 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 500 (comp_out)*layout_er[1] + e*layout_er[2]; 501 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 502 (comp_in)*layout_er[1] + e*layout_er[2]; 503 504 const CeedInt row = elem_dof_a[elem_dof_index_row]; 505 const CeedInt col = elem_dof_a[elem_dof_index_col]; 506 507 rows[offset + count] = row; 508 cols[offset + count] = col; 509 count++; 510 } 511 } 512 } 513 } 514 } 515 if (count != local_num_entries) 516 // LCOV_EXCL_START 517 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 518 // LCOV_EXCL_STOP 519 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 520 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 521 522 return CEED_ERROR_SUCCESS; 523 } 524 525 /** 526 @brief Assemble nonzero entries for non-composite operator 527 528 Users should generally use CeedOperatorLinearAssemble() 529 530 @param[in] op CeedOperator to assemble 531 @param[out] offset Offest for number of entries 532 @param[out] values Values to assemble into matrix 533 534 @return An error code: 0 - success, otherwise - failure 535 536 @ref Developer 537 **/ 538 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 539 CeedVector values) { 540 int ierr; 541 Ceed ceed = op->ceed; 542 if (op->composite) 543 // LCOV_EXCL_START 544 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 545 "Composite operator not supported"); 546 // LCOV_EXCL_STOP 547 548 // Assemble QFunction 549 CeedQFunction qf; 550 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 551 CeedInt num_input_fields, num_output_fields; 552 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 553 CeedChk(ierr); 554 CeedVector assembled_qf; 555 CeedElemRestriction rstr_q; 556 ierr = CeedOperatorLinearAssembleQFunction( 557 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 558 559 CeedInt qf_length; 560 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 561 562 CeedOperatorField *input_fields; 563 CeedOperatorField *output_fields; 564 ierr = CeedOperatorGetFields(op, &input_fields, &output_fields); CeedChk(ierr); 565 566 // Determine active input basis 567 CeedQFunctionField *qf_fields; 568 ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChk(ierr); 569 CeedInt num_eval_mode_in = 0, dim = 1; 570 CeedEvalMode *eval_mode_in = NULL; 571 CeedBasis basis_in = NULL; 572 CeedElemRestriction rstr_in = NULL; 573 for (CeedInt i=0; i<num_input_fields; i++) { 574 CeedVector vec; 575 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 576 if (vec == CEED_VECTOR_ACTIVE) { 577 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 578 CeedChk(ierr); 579 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 580 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 581 CeedChk(ierr); 582 CeedEvalMode eval_mode; 583 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 584 CeedChk(ierr); 585 switch (eval_mode) { 586 case CEED_EVAL_NONE: 587 case CEED_EVAL_INTERP: 588 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 589 eval_mode_in[num_eval_mode_in] = eval_mode; 590 num_eval_mode_in += 1; 591 break; 592 case CEED_EVAL_GRAD: 593 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 594 for (CeedInt d=0; d<dim; d++) { 595 eval_mode_in[num_eval_mode_in+d] = eval_mode; 596 } 597 num_eval_mode_in += dim; 598 break; 599 case CEED_EVAL_WEIGHT: 600 case CEED_EVAL_DIV: 601 case CEED_EVAL_CURL: 602 break; // Caught by QF Assembly 603 } 604 } 605 } 606 607 // Determine active output basis 608 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields); CeedChk(ierr); 609 CeedInt num_eval_mode_out = 0; 610 CeedEvalMode *eval_mode_out = NULL; 611 CeedBasis basis_out = NULL; 612 CeedElemRestriction rstr_out = NULL; 613 for (CeedInt i=0; i<num_output_fields; i++) { 614 CeedVector vec; 615 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 616 if (vec == CEED_VECTOR_ACTIVE) { 617 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 618 CeedChk(ierr); 619 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 620 CeedChk(ierr); 621 CeedEvalMode eval_mode; 622 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 623 CeedChk(ierr); 624 switch (eval_mode) { 625 case CEED_EVAL_NONE: 626 case CEED_EVAL_INTERP: 627 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 628 eval_mode_out[num_eval_mode_out] = eval_mode; 629 num_eval_mode_out += 1; 630 break; 631 case CEED_EVAL_GRAD: 632 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 633 for (CeedInt d=0; d<dim; d++) { 634 eval_mode_out[num_eval_mode_out+d] = eval_mode; 635 } 636 num_eval_mode_out += dim; 637 break; 638 case CEED_EVAL_WEIGHT: 639 case CEED_EVAL_DIV: 640 case CEED_EVAL_CURL: 641 break; // Caught by QF Assembly 642 } 643 } 644 } 645 646 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 647 // LCOV_EXCL_START 648 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 649 "Cannot assemble operator with out inputs/outputs"); 650 // LCOV_EXCL_STOP 651 652 CeedInt num_elem, elem_size, num_qpts, num_comp; 653 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 654 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 655 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 656 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 657 658 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 659 660 // loop over elements and put in data structure 661 const CeedScalar *interp_in, *grad_in; 662 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 663 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 664 665 const CeedScalar *assembled_qf_array; 666 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 667 CeedChk(ierr); 668 669 CeedInt layout_qf[3]; 670 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 671 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 672 673 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 674 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 675 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 676 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 677 num_qpts]; // logically 3-tensor 678 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 679 CeedScalar elem_mat[elem_size * elem_size]; 680 int count = 0; 681 CeedScalar *vals; 682 ierr = CeedVectorGetArray(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 683 for (int e = 0; e < num_elem; ++e) { 684 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 685 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 686 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 687 B_mat_in[ell] = 0.0; 688 } 689 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 690 B_mat_out[ell] = 0.0; 691 } 692 // Store block-diagonal D matrix as collection of small dense blocks 693 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 694 D_mat[ell] = 0.0; 695 } 696 // form element matrix itself (for each block component) 697 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 698 elem_mat[ell] = 0.0; 699 } 700 for (int q = 0; q < num_qpts; ++q) { 701 for (int n = 0; n < elem_size; ++n) { 702 CeedInt d_in = -1; 703 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 704 const int qq = num_eval_mode_in*q; 705 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 706 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 707 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 708 d_in += 1; 709 B_mat_in[(qq+e_in)*elem_size + n] += 710 grad_in[(d_in*num_qpts+q) * elem_size + n]; 711 } else { 712 // LCOV_EXCL_START 713 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 714 // LCOV_EXCL_STOP 715 } 716 } 717 CeedInt d_out = -1; 718 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 719 const int qq = num_eval_mode_out*q; 720 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 721 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 722 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 723 d_out += 1; 724 B_mat_out[(qq+e_out)*elem_size + n] += 725 grad_in[(d_out*num_qpts+q) * elem_size + n]; 726 } else { 727 // LCOV_EXCL_START 728 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 729 // LCOV_EXCL_STOP 730 } 731 } 732 } 733 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 734 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 735 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 736 +comp_out; 737 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 738 e*layout_qf[2]; 739 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 740 } 741 } 742 } 743 // Compute B^T*D 744 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 745 BTD[ell] = 0.0; 746 } 747 for (int j = 0; j<elem_size; ++j) { 748 for (int q = 0; q<num_qpts; ++q) { 749 int qq = num_eval_mode_out*q; 750 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 751 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 752 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 753 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 754 } 755 } 756 } 757 } 758 759 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 760 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 761 762 // put element matrix in coordinate data structure 763 for (int i = 0; i < elem_size; ++i) { 764 for (int j = 0; j < elem_size; ++j) { 765 vals[offset + count] = elem_mat[i*elem_size + j]; 766 count++; 767 } 768 } 769 } 770 } 771 } 772 if (count != local_num_entries) 773 // LCOV_EXCL_START 774 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 775 // LCOV_EXCL_STOP 776 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 777 778 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 779 CeedChk(ierr); 780 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 781 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 782 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 783 784 return CEED_ERROR_SUCCESS; 785 } 786 787 /** 788 @brief Count number of entries for assembled CeedOperator 789 790 @param[in] op CeedOperator to assemble 791 @param[out] num_entries Number of entries in assembled representation 792 793 @return An error code: 0 - success, otherwise - failure 794 795 @ref Utility 796 **/ 797 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 798 CeedInt *num_entries) { 799 int ierr; 800 CeedElemRestriction rstr; 801 CeedInt num_elem, elem_size, num_comp; 802 803 if (op->composite) 804 // LCOV_EXCL_START 805 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 806 "Composite operator not supported"); 807 // LCOV_EXCL_STOP 808 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 809 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 810 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 811 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 812 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 813 814 return CEED_ERROR_SUCCESS; 815 } 816 817 /** 818 @brief Common code for creating a multigrid coarse operator and level 819 transfer operators for a CeedOperator 820 821 @param[in] op_fine Fine grid operator 822 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 823 @param[in] rstr_coarse Coarse grid restriction 824 @param[in] basis_coarse Coarse grid active vector basis 825 @param[in] basis_c_to_f Basis for coarse to fine interpolation 826 @param[out] op_coarse Coarse grid operator 827 @param[out] op_prolong Coarse to fine operator 828 @param[out] op_restrict Fine to coarse operator 829 830 @return An error code: 0 - success, otherwise - failure 831 832 @ref Developer 833 **/ 834 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 835 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 836 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 837 CeedOperator *op_restrict) { 838 int ierr; 839 Ceed ceed; 840 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 841 842 // Check for composite operator 843 bool is_composite; 844 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 845 if (is_composite) 846 // LCOV_EXCL_START 847 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 848 "Automatic multigrid setup for composite operators not supported"); 849 // LCOV_EXCL_STOP 850 851 // Coarse Grid 852 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 853 op_coarse); CeedChk(ierr); 854 CeedElemRestriction rstr_fine = NULL; 855 // -- Clone input fields 856 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 857 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 858 rstr_fine = op_fine->input_fields[i]->elem_restr; 859 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 860 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 861 CeedChk(ierr); 862 } else { 863 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 864 op_fine->input_fields[i]->elem_restr, 865 op_fine->input_fields[i]->basis, 866 op_fine->input_fields[i]->vec); CeedChk(ierr); 867 } 868 } 869 // -- Clone output fields 870 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 871 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 872 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 873 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 874 CeedChk(ierr); 875 } else { 876 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 877 op_fine->output_fields[i]->elem_restr, 878 op_fine->output_fields[i]->basis, 879 op_fine->output_fields[i]->vec); CeedChk(ierr); 880 } 881 } 882 883 // Multiplicity vector 884 CeedVector mult_vec, mult_e_vec; 885 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 886 CeedChk(ierr); 887 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 888 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 889 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 890 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 891 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 892 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 893 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 894 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 895 896 // Restriction 897 CeedInt num_comp; 898 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 899 CeedQFunction qf_restrict; 900 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 901 CeedChk(ierr); 902 CeedInt *num_comp_r_data; 903 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 904 num_comp_r_data[0] = num_comp; 905 CeedQFunctionContext ctx_r; 906 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 907 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 908 sizeof(*num_comp_r_data), num_comp_r_data); 909 CeedChk(ierr); 910 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 911 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 912 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 913 CeedChk(ierr); 914 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 915 CeedChk(ierr); 916 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 917 CEED_EVAL_INTERP); CeedChk(ierr); 918 919 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 920 CEED_QFUNCTION_NONE, op_restrict); 921 CeedChk(ierr); 922 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 923 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 924 CeedChk(ierr); 925 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 926 CEED_BASIS_COLLOCATED, mult_vec); 927 CeedChk(ierr); 928 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 929 CEED_VECTOR_ACTIVE); CeedChk(ierr); 930 931 // Prolongation 932 CeedQFunction qf_prolong; 933 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 934 CeedChk(ierr); 935 CeedInt *num_comp_p_data; 936 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 937 num_comp_p_data[0] = num_comp; 938 CeedQFunctionContext ctx_p; 939 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 940 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 941 sizeof(*num_comp_p_data), num_comp_p_data); 942 CeedChk(ierr); 943 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 944 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 945 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 946 CeedChk(ierr); 947 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 948 CeedChk(ierr); 949 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 950 CeedChk(ierr); 951 952 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 953 CEED_QFUNCTION_NONE, op_prolong); 954 CeedChk(ierr); 955 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 956 CEED_VECTOR_ACTIVE); CeedChk(ierr); 957 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 958 CEED_BASIS_COLLOCATED, mult_vec); 959 CeedChk(ierr); 960 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 961 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 962 CeedChk(ierr); 963 964 // Cleanup 965 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 966 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 967 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 968 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 969 return CEED_ERROR_SUCCESS; 970 } 971 972 /** 973 @brief Build 1D mass matrix and Laplacian with perturbation 974 975 @param[in] interp_1d Interpolation matrix in one dimension 976 @param[in] grad_1d Gradient matrix in one dimension 977 @param[in] q_weight_1d Quadrature weights in one dimension 978 @param[in] P_1d Number of basis nodes in one dimension 979 @param[in] Q_1d Number of quadrature points in one dimension 980 @param[in] dim Dimension of basis 981 @param[out] mass Assembled mass matrix in one dimension 982 @param[out] laplace Assembled perturbed Laplacian in one dimension 983 984 @return An error code: 0 - success, otherwise - failure 985 986 @ref Developer 987 **/ 988 CeedPragmaOptimizeOff 989 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 990 const CeedScalar *grad_1d, 991 const CeedScalar *q_weight_1d, CeedInt P_1d, 992 CeedInt Q_1d, CeedInt dim, 993 CeedScalar *mass, CeedScalar *laplace) { 994 995 for (CeedInt i=0; i<P_1d; i++) 996 for (CeedInt j=0; j<P_1d; j++) { 997 CeedScalar sum = 0.0; 998 for (CeedInt k=0; k<Q_1d; k++) 999 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1000 mass[i+j*P_1d] = sum; 1001 } 1002 // -- Laplacian 1003 for (CeedInt i=0; i<P_1d; i++) 1004 for (CeedInt j=0; j<P_1d; j++) { 1005 CeedScalar sum = 0.0; 1006 for (CeedInt k=0; k<Q_1d; k++) 1007 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1008 laplace[i+j*P_1d] = sum; 1009 } 1010 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1011 for (CeedInt i=0; i<P_1d; i++) 1012 laplace[i+P_1d*i] += perturbation; 1013 return CEED_ERROR_SUCCESS; 1014 } 1015 CeedPragmaOptimizeOn 1016 1017 /// @} 1018 1019 /// ---------------------------------------------------------------------------- 1020 /// CeedOperator Public API 1021 /// ---------------------------------------------------------------------------- 1022 /// @addtogroup CeedOperatorUser 1023 /// @{ 1024 1025 /** 1026 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1027 1028 This returns a CeedVector containing a matrix at each quadrature point 1029 providing the action of the CeedQFunction associated with the CeedOperator. 1030 The vector 'assembled' is of shape 1031 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1032 and contains column-major matrices representing the action of the 1033 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1034 outputs are in the order provided by the user when adding CeedOperator fields. 1035 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1036 'v', provided in that order, would result in an assembled QFunction that 1037 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1038 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1039 1040 @param op CeedOperator to assemble CeedQFunction 1041 @param[out] assembled CeedVector to store assembled CeedQFunction at 1042 quadrature points 1043 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1044 CeedQFunction 1045 @param request Address of CeedRequest for non-blocking completion, else 1046 @ref CEED_REQUEST_IMMEDIATE 1047 1048 @return An error code: 0 - success, otherwise - failure 1049 1050 @ref User 1051 **/ 1052 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1053 CeedElemRestriction *rstr, 1054 CeedRequest *request) { 1055 int ierr; 1056 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1057 1058 // Backend version 1059 if (op->LinearAssembleQFunction) { 1060 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); CeedChk(ierr); 1061 return CEED_ERROR_SUCCESS; 1062 } else { 1063 // Fallback to reference Ceed 1064 if (!op->op_fallback) { 1065 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1066 } 1067 // Assemble 1068 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1069 rstr, request); CeedChk(ierr); 1070 return CEED_ERROR_SUCCESS; 1071 } 1072 } 1073 1074 /** 1075 @brief Assemble the diagonal of a square linear CeedOperator 1076 1077 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1078 1079 Note: Currently only non-composite CeedOperators with a single field and 1080 composite CeedOperators with single field sub-operators are supported. 1081 1082 @param op CeedOperator to assemble CeedQFunction 1083 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1084 @param request Address of CeedRequest for non-blocking completion, else 1085 @ref CEED_REQUEST_IMMEDIATE 1086 1087 @return An error code: 0 - success, otherwise - failure 1088 1089 @ref User 1090 **/ 1091 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1092 CeedRequest *request) { 1093 int ierr; 1094 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1095 1096 // Use backend version, if available 1097 if (op->LinearAssembleDiagonal) { 1098 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1099 return CEED_ERROR_SUCCESS; 1100 } else if (op->LinearAssembleAddDiagonal) { 1101 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1102 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1103 return CEED_ERROR_SUCCESS; 1104 } else { 1105 // Check for valid fallback resource 1106 const char *resource, *fallback_resource; 1107 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1108 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1109 CeedChk(ierr); 1110 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1111 // Fallback to reference Ceed 1112 if (!op->op_fallback) { 1113 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1114 } 1115 // Assemble 1116 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1117 CeedChk(ierr); 1118 return CEED_ERROR_SUCCESS; 1119 } 1120 } 1121 1122 // Default interface implementation 1123 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1124 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1125 CeedChk(ierr); 1126 return CEED_ERROR_SUCCESS; 1127 } 1128 1129 /** 1130 @brief Assemble the diagonal of a square linear CeedOperator 1131 1132 This sums into a CeedVector the diagonal of a linear CeedOperator. 1133 1134 Note: Currently only non-composite CeedOperators with a single field and 1135 composite CeedOperators with single field sub-operators are supported. 1136 1137 @param op CeedOperator to assemble CeedQFunction 1138 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1139 @param request Address of CeedRequest for non-blocking completion, else 1140 @ref CEED_REQUEST_IMMEDIATE 1141 1142 @return An error code: 0 - success, otherwise - failure 1143 1144 @ref User 1145 **/ 1146 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1147 CeedRequest *request) { 1148 int ierr; 1149 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1150 1151 // Use backend version, if available 1152 if (op->LinearAssembleAddDiagonal) { 1153 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1154 return CEED_ERROR_SUCCESS; 1155 } else { 1156 // Check for valid fallback resource 1157 const char *resource, *fallback_resource; 1158 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1159 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1160 CeedChk(ierr); 1161 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1162 // Fallback to reference Ceed 1163 if (!op->op_fallback) { 1164 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1165 } 1166 // Assemble 1167 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1168 request); CeedChk(ierr); 1169 return CEED_ERROR_SUCCESS; 1170 } 1171 } 1172 1173 // Default interface implementation 1174 bool is_composite; 1175 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1176 if (is_composite) { 1177 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1178 false, assembled); CeedChk(ierr); 1179 return CEED_ERROR_SUCCESS; 1180 } else { 1181 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1182 CeedChk(ierr); 1183 return CEED_ERROR_SUCCESS; 1184 } 1185 } 1186 1187 /** 1188 @brief Assemble the point block diagonal of a square linear CeedOperator 1189 1190 This overwrites a CeedVector with the point block diagonal of a linear 1191 CeedOperator. 1192 1193 Note: Currently only non-composite CeedOperators with a single field and 1194 composite CeedOperators with single field sub-operators are supported. 1195 1196 @param op CeedOperator to assemble CeedQFunction 1197 @param[out] assembled CeedVector to store assembled CeedOperator point block 1198 diagonal, provided in row-major form with an 1199 @a num_comp * @a num_comp block at each node. The dimensions 1200 of this vector are derived from the active vector 1201 for the CeedOperator. The array has shape 1202 [nodes, component out, component in]. 1203 @param request Address of CeedRequest for non-blocking completion, else 1204 CEED_REQUEST_IMMEDIATE 1205 1206 @return An error code: 0 - success, otherwise - failure 1207 1208 @ref User 1209 **/ 1210 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1211 CeedVector assembled, CeedRequest *request) { 1212 int ierr; 1213 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1214 1215 // Use backend version, if available 1216 if (op->LinearAssemblePointBlockDiagonal) { 1217 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1218 CeedChk(ierr); 1219 return CEED_ERROR_SUCCESS; 1220 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1221 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1222 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1223 request); CeedChk(ierr); 1224 return CEED_ERROR_SUCCESS; 1225 } else { 1226 // Check for valid fallback resource 1227 const char *resource, *fallback_resource; 1228 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1229 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1230 CeedChk(ierr); 1231 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1232 // Fallback to reference Ceed 1233 if (!op->op_fallback) { 1234 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1235 } 1236 // Assemble 1237 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1238 assembled, request); CeedChk(ierr); 1239 return CEED_ERROR_SUCCESS; 1240 } 1241 } 1242 1243 // Default interface implementation 1244 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1245 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1246 CeedChk(ierr); 1247 return CEED_ERROR_SUCCESS; 1248 } 1249 1250 /** 1251 @brief Assemble the point block diagonal of a square linear CeedOperator 1252 1253 This sums into a CeedVector with the point block diagonal of a linear 1254 CeedOperator. 1255 1256 Note: Currently only non-composite CeedOperators with a single field and 1257 composite CeedOperators with single field sub-operators are supported. 1258 1259 @param op CeedOperator to assemble CeedQFunction 1260 @param[out] assembled CeedVector to store assembled CeedOperator point block 1261 diagonal, provided in row-major form with an 1262 @a num_comp * @a num_comp block at each node. The dimensions 1263 of this vector are derived from the active vector 1264 for the CeedOperator. The array has shape 1265 [nodes, component out, component in]. 1266 @param request Address of CeedRequest for non-blocking completion, else 1267 CEED_REQUEST_IMMEDIATE 1268 1269 @return An error code: 0 - success, otherwise - failure 1270 1271 @ref User 1272 **/ 1273 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1274 CeedVector assembled, CeedRequest *request) { 1275 int ierr; 1276 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1277 1278 // Use backend version, if available 1279 if (op->LinearAssembleAddPointBlockDiagonal) { 1280 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1281 CeedChk(ierr); 1282 return CEED_ERROR_SUCCESS; 1283 } else { 1284 // Check for valid fallback resource 1285 const char *resource, *fallback_resource; 1286 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1287 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1288 CeedChk(ierr); 1289 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1290 // Fallback to reference Ceed 1291 if (!op->op_fallback) { 1292 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1293 } 1294 // Assemble 1295 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1296 assembled, request); CeedChk(ierr); 1297 return CEED_ERROR_SUCCESS; 1298 } 1299 } 1300 1301 // Default interface implemenation 1302 bool is_composite; 1303 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1304 if (is_composite) { 1305 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1306 true, assembled); CeedChk(ierr); 1307 return CEED_ERROR_SUCCESS; 1308 } else { 1309 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1310 CeedChk(ierr); 1311 return CEED_ERROR_SUCCESS; 1312 } 1313 } 1314 1315 /** 1316 @brief Fully assemble the nonzero pattern of a linear operator. 1317 1318 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1319 1320 The assembly routines use coordinate format, with num_entries tuples of the 1321 form (i, j, value) which indicate that value should be added to the matrix 1322 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1323 This function returns the number of entries and their (i, j) locations, 1324 while CeedOperatorLinearAssemble() provides the values in the same 1325 ordering. 1326 1327 This will generally be slow unless your operator is low-order. 1328 1329 @param[in] op CeedOperator to assemble 1330 @param[out] num_entries Number of entries in coordinate nonzero pattern 1331 @param[out] rows Row number for each entry 1332 @param[out] cols Column number for each entry 1333 1334 @ref User 1335 **/ 1336 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries, 1337 CeedInt **rows, CeedInt **cols) { 1338 int ierr; 1339 CeedInt num_suboperators, single_entries; 1340 CeedOperator *sub_operators; 1341 bool is_composite; 1342 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1343 1344 // Use backend version, if available 1345 if (op->LinearAssembleSymbolic) { 1346 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1347 return CEED_ERROR_SUCCESS; 1348 } else { 1349 // Check for valid fallback resource 1350 const char *resource, *fallback_resource; 1351 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1352 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1353 CeedChk(ierr); 1354 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1355 // Fallback to reference Ceed 1356 if (!op->op_fallback) { 1357 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1358 } 1359 // Assemble 1360 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1361 cols); CeedChk(ierr); 1362 return CEED_ERROR_SUCCESS; 1363 } 1364 } 1365 1366 // Default interface implementation 1367 1368 // count entries and allocate rows, cols arrays 1369 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1370 *num_entries = 0; 1371 if (is_composite) { 1372 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1373 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1374 for (int k = 0; k < num_suboperators; ++k) { 1375 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1376 &single_entries); CeedChk(ierr); 1377 *num_entries += single_entries; 1378 } 1379 } else { 1380 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1381 &single_entries); CeedChk(ierr); 1382 *num_entries += single_entries; 1383 } 1384 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1385 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1386 1387 // assemble nonzero locations 1388 CeedInt offset = 0; 1389 if (is_composite) { 1390 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1391 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1392 for (int k = 0; k < num_suboperators; ++k) { 1393 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1394 *cols); CeedChk(ierr); 1395 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1396 &single_entries); 1397 CeedChk(ierr); 1398 offset += single_entries; 1399 } 1400 } else { 1401 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1402 CeedChk(ierr); 1403 } 1404 1405 return CEED_ERROR_SUCCESS; 1406 } 1407 1408 /** 1409 @brief Fully assemble the nonzero entries of a linear operator. 1410 1411 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1412 1413 The assembly routines use coordinate format, with num_entries tuples of the 1414 form (i, j, value) which indicate that value should be added to the matrix 1415 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1416 This function returns the values of the nonzero entries to be added, their 1417 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1418 1419 This will generally be slow unless your operator is low-order. 1420 1421 @param[in] op CeedOperator to assemble 1422 @param[out] values Values to assemble into matrix 1423 1424 @ref User 1425 **/ 1426 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1427 int ierr; 1428 CeedInt num_suboperators, single_entries = 0; 1429 CeedOperator *sub_operators; 1430 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1431 1432 // Use backend version, if available 1433 if (op->LinearAssemble) { 1434 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1435 return CEED_ERROR_SUCCESS; 1436 } else { 1437 // Check for valid fallback resource 1438 const char *resource, *fallback_resource; 1439 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1440 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1441 CeedChk(ierr); 1442 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1443 // Fallback to reference Ceed 1444 if (!op->op_fallback) { 1445 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1446 } 1447 // Assemble 1448 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1449 return CEED_ERROR_SUCCESS; 1450 } 1451 } 1452 1453 // Default interface implementation 1454 bool is_composite; 1455 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1456 1457 CeedInt offset = 0; 1458 if (is_composite) { 1459 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1460 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1461 for (int k = 0; k < num_suboperators; ++k) { 1462 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1463 CeedChk(ierr); 1464 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1465 &single_entries); 1466 CeedChk(ierr); 1467 offset += single_entries; 1468 } 1469 } else { 1470 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1471 } 1472 1473 return CEED_ERROR_SUCCESS; 1474 } 1475 1476 /** 1477 @brief Create a multigrid coarse operator and level transfer operators 1478 for a CeedOperator, creating the prolongation basis from the 1479 fine and coarse grid interpolation 1480 1481 @param[in] op_fine Fine grid operator 1482 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1483 @param[in] rstr_coarse Coarse grid restriction 1484 @param[in] basis_coarse Coarse grid active vector basis 1485 @param[out] op_coarse Coarse grid operator 1486 @param[out] op_prolong Coarse to fine operator 1487 @param[out] op_restrict Fine to coarse operator 1488 1489 @return An error code: 0 - success, otherwise - failure 1490 1491 @ref User 1492 **/ 1493 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1494 CeedVector p_mult_fine, 1495 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1496 CeedOperator *op_coarse, CeedOperator *op_prolong, 1497 CeedOperator *op_restrict) { 1498 int ierr; 1499 Ceed ceed; 1500 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1501 1502 // Check for compatible quadrature spaces 1503 CeedBasis basis_fine; 1504 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1505 CeedInt Q_f, Q_c; 1506 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1507 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1508 if (Q_f != Q_c) 1509 // LCOV_EXCL_START 1510 return CeedError(ceed, CEED_ERROR_DIMENSION, 1511 "Bases must have compatible quadrature spaces"); 1512 // LCOV_EXCL_STOP 1513 1514 // Coarse to fine basis 1515 CeedInt P_f, P_c, Q = Q_f; 1516 bool is_tensor_f, is_tensor_c; 1517 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1518 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1519 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1520 if (is_tensor_f && is_tensor_c) { 1521 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1522 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1523 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1524 } else if (!is_tensor_f && !is_tensor_c) { 1525 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1526 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1527 } else { 1528 // LCOV_EXCL_START 1529 return CeedError(ceed, CEED_ERROR_MINOR, 1530 "Bases must both be tensor or non-tensor"); 1531 // LCOV_EXCL_STOP 1532 } 1533 1534 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1535 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1536 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1537 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1538 if (is_tensor_f) { 1539 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1540 memcpy(interp_c, basis_coarse->interp_1d, 1541 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1542 } else { 1543 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1544 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1545 } 1546 1547 // -- QR Factorization, interp_f = Q R 1548 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1549 1550 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1551 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1552 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1553 1554 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1555 for (CeedInt j=0; j<P_c; j++) { // Column j 1556 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1557 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1558 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1559 for (CeedInt k=i+1; k<P_f; k++) 1560 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1561 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1562 } 1563 } 1564 ierr = CeedFree(&tau); CeedChk(ierr); 1565 ierr = CeedFree(&interp_c); CeedChk(ierr); 1566 ierr = CeedFree(&interp_f); CeedChk(ierr); 1567 1568 // Complete with interp_c_to_f versions of code 1569 if (is_tensor_f) { 1570 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1571 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1572 CeedChk(ierr); 1573 } else { 1574 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1575 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1576 CeedChk(ierr); 1577 } 1578 1579 // Cleanup 1580 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1581 return CEED_ERROR_SUCCESS; 1582 } 1583 1584 /** 1585 @brief Create a multigrid coarse operator and level transfer operators 1586 for a CeedOperator with a tensor basis for the active basis 1587 1588 @param[in] op_fine Fine grid operator 1589 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1590 @param[in] rstr_coarse Coarse grid restriction 1591 @param[in] basis_coarse Coarse grid active vector basis 1592 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1593 @param[out] op_coarse Coarse grid operator 1594 @param[out] op_prolong Coarse to fine operator 1595 @param[out] op_restrict Fine to coarse operator 1596 1597 @return An error code: 0 - success, otherwise - failure 1598 1599 @ref User 1600 **/ 1601 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1602 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1603 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1604 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1605 int ierr; 1606 Ceed ceed; 1607 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1608 1609 // Check for compatible quadrature spaces 1610 CeedBasis basis_fine; 1611 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1612 CeedInt Q_f, Q_c; 1613 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1614 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1615 if (Q_f != Q_c) 1616 // LCOV_EXCL_START 1617 return CeedError(ceed, CEED_ERROR_DIMENSION, 1618 "Bases must have compatible quadrature spaces"); 1619 // LCOV_EXCL_STOP 1620 1621 // Coarse to fine basis 1622 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1623 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1624 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1625 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1626 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1627 CeedChk(ierr); 1628 P_1d_c = dim == 1 ? num_nodes_c : 1629 dim == 2 ? sqrt(num_nodes_c) : 1630 cbrt(num_nodes_c); 1631 CeedScalar *q_ref, *q_weight, *grad; 1632 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1633 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1634 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1635 CeedBasis basis_c_to_f; 1636 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1637 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1638 CeedChk(ierr); 1639 ierr = CeedFree(&q_ref); CeedChk(ierr); 1640 ierr = CeedFree(&q_weight); CeedChk(ierr); 1641 ierr = CeedFree(&grad); CeedChk(ierr); 1642 1643 // Core code 1644 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1645 basis_coarse, basis_c_to_f, op_coarse, 1646 op_prolong, op_restrict); 1647 CeedChk(ierr); 1648 return CEED_ERROR_SUCCESS; 1649 } 1650 1651 /** 1652 @brief Create a multigrid coarse operator and level transfer operators 1653 for a CeedOperator with a non-tensor basis for the active vector 1654 1655 @param[in] op_fine Fine grid operator 1656 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1657 @param[in] rstr_coarse Coarse grid restriction 1658 @param[in] basis_coarse Coarse grid active vector basis 1659 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1660 @param[out] op_coarse Coarse grid operator 1661 @param[out] op_prolong Coarse to fine operator 1662 @param[out] op_restrict Fine to coarse operator 1663 1664 @return An error code: 0 - success, otherwise - failure 1665 1666 @ref User 1667 **/ 1668 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1669 CeedVector p_mult_fine, 1670 CeedElemRestriction rstr_coarse, 1671 CeedBasis basis_coarse, 1672 const CeedScalar *interp_c_to_f, 1673 CeedOperator *op_coarse, 1674 CeedOperator *op_prolong, 1675 CeedOperator *op_restrict) { 1676 int ierr; 1677 Ceed ceed; 1678 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1679 1680 // Check for compatible quadrature spaces 1681 CeedBasis basis_fine; 1682 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1683 CeedInt Q_f, Q_c; 1684 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1685 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1686 if (Q_f != Q_c) 1687 // LCOV_EXCL_START 1688 return CeedError(ceed, CEED_ERROR_DIMENSION, 1689 "Bases must have compatible quadrature spaces"); 1690 // LCOV_EXCL_STOP 1691 1692 // Coarse to fine basis 1693 CeedElemTopology topo; 1694 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1695 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1696 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1697 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1698 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1699 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1700 CeedChk(ierr); 1701 CeedScalar *q_ref, *q_weight, *grad; 1702 ierr = CeedCalloc(num_nodes_f, &q_ref); CeedChk(ierr); 1703 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1704 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 1705 CeedBasis basis_c_to_f; 1706 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 1707 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1708 CeedChk(ierr); 1709 ierr = CeedFree(&q_ref); CeedChk(ierr); 1710 ierr = CeedFree(&q_weight); CeedChk(ierr); 1711 ierr = CeedFree(&grad); CeedChk(ierr); 1712 1713 // Core code 1714 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1715 basis_coarse, basis_c_to_f, op_coarse, 1716 op_prolong, op_restrict); 1717 CeedChk(ierr); 1718 return CEED_ERROR_SUCCESS; 1719 } 1720 1721 /** 1722 @brief Build a FDM based approximate inverse for each element for a 1723 CeedOperator 1724 1725 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 1726 Method based approximate inverse. This function obtains the simultaneous 1727 diagonalization for the 1D mass and Laplacian operators, 1728 M = V^T V, K = V^T S V. 1729 The assembled QFunction is used to modify the eigenvalues from simultaneous 1730 diagonalization and obtain an approximate inverse of the form 1731 V^T S^hat V. The CeedOperator must be linear and non-composite. The 1732 associated CeedQFunction must therefore also be linear. 1733 1734 @param op CeedOperator to create element inverses 1735 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 1736 for each element 1737 @param request Address of CeedRequest for non-blocking completion, else 1738 @ref CEED_REQUEST_IMMEDIATE 1739 1740 @return An error code: 0 - success, otherwise - failure 1741 1742 @ref Backend 1743 **/ 1744 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 1745 CeedRequest *request) { 1746 int ierr; 1747 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1748 1749 // Use backend version, if available 1750 if (op->CreateFDMElementInverse) { 1751 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 1752 return CEED_ERROR_SUCCESS; 1753 } else { 1754 // Check for valid fallback resource 1755 const char *resource, *fallback_resource; 1756 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1757 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1758 CeedChk(ierr); 1759 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1760 // Fallback to reference Ceed 1761 if (!op->op_fallback) { 1762 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1763 } 1764 // Assemble 1765 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 1766 CeedChk(ierr); 1767 return CEED_ERROR_SUCCESS; 1768 } 1769 } 1770 1771 // Interface implementation 1772 Ceed ceed, ceed_parent; 1773 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 1774 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 1775 ceed_parent = ceed_parent ? ceed_parent : ceed; 1776 CeedQFunction qf; 1777 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 1778 1779 // Determine active input basis 1780 bool interp = false, grad = false; 1781 CeedBasis basis = NULL; 1782 CeedElemRestriction rstr = NULL; 1783 CeedOperatorField *op_fields; 1784 CeedQFunctionField *qf_fields; 1785 ierr = CeedOperatorGetFields(op, &op_fields, NULL); CeedChk(ierr); 1786 ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChk(ierr); 1787 CeedInt num_input_fields; 1788 ierr = CeedQFunctionGetNumArgs(qf, &num_input_fields, NULL); CeedChk(ierr); 1789 for (CeedInt i=0; i<num_input_fields; i++) { 1790 CeedVector vec; 1791 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 1792 if (vec == CEED_VECTOR_ACTIVE) { 1793 CeedEvalMode eval_mode; 1794 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 1795 interp = interp || eval_mode == CEED_EVAL_INTERP; 1796 grad = grad || eval_mode == CEED_EVAL_GRAD; 1797 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 1798 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 1799 } 1800 } 1801 if (!basis) 1802 // LCOV_EXCL_START 1803 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 1804 // LCOV_EXCL_STOP 1805 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1, 1806 l_size = 1; 1807 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 1808 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 1809 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 1810 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 1811 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 1812 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 1813 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 1814 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 1815 1816 // Build and diagonalize 1D Mass and Laplacian 1817 bool tensor_basis; 1818 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 1819 if (!tensor_basis) 1820 // LCOV_EXCL_START 1821 return CeedError(ceed, CEED_ERROR_BACKEND, 1822 "FDMElementInverse only supported for tensor " 1823 "bases"); 1824 // LCOV_EXCL_STOP 1825 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 1826 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 1827 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 1828 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 1829 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 1830 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 1831 // -- Build matrices 1832 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 1833 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 1834 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 1835 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 1836 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 1837 mass, laplace); CeedChk(ierr); 1838 1839 // -- Diagonalize 1840 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 1841 CeedChk(ierr); 1842 ierr = CeedFree(&mass); CeedChk(ierr); 1843 ierr = CeedFree(&laplace); CeedChk(ierr); 1844 for (CeedInt i=0; i<P_1d; i++) 1845 for (CeedInt j=0; j<P_1d; j++) 1846 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 1847 ierr = CeedFree(&x); CeedChk(ierr); 1848 1849 // Assemble QFunction 1850 CeedVector assembled; 1851 CeedElemRestriction rstr_qf; 1852 ierr = CeedOperatorLinearAssembleQFunction(op, &assembled, &rstr_qf, request); 1853 CeedChk(ierr); 1854 CeedInt layout[3]; 1855 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 1856 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 1857 CeedScalar max_norm = 0; 1858 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 1859 1860 // Calculate element averages 1861 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 1862 CeedScalar *elem_avg; 1863 const CeedScalar *assembled_array, *q_weight_array; 1864 CeedVector q_weight; 1865 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 1866 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 1867 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 1868 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 1869 CeedChk(ierr); 1870 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 1871 CeedChk(ierr); 1872 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 1873 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 1874 for (CeedInt e=0; e<num_elem; e++) { 1875 CeedInt count = 0; 1876 for (CeedInt q=0; q<num_qpts; q++) 1877 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 1878 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 1879 qf_value_bound) { 1880 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 1881 q_weight_array[q]; 1882 count++; 1883 } 1884 if (count) { 1885 elem_avg[e] /= count; 1886 } else { 1887 elem_avg[e] = 1.0; 1888 } 1889 } 1890 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 1891 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 1892 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 1893 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 1894 1895 // Build FDM diagonal 1896 CeedVector q_data; 1897 CeedScalar *q_data_array, *fdm_diagonal; 1898 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 1899 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 1900 for (CeedInt c=0; c<num_comp; c++) 1901 for (CeedInt n=0; n<elem_size; n++) { 1902 if (interp) 1903 fdm_diagonal[c*elem_size + n] = 1.0; 1904 if (grad) 1905 for (CeedInt d=0; d<dim; d++) { 1906 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 1907 fdm_diagonal[c*elem_size + n] += lambda[i]; 1908 } 1909 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 1910 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 1911 } 1912 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 1913 CeedChk(ierr); 1914 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 1915 ierr = CeedVectorGetArray(q_data, CEED_MEM_HOST, &q_data_array); CeedChk(ierr); 1916 for (CeedInt e=0; e<num_elem; e++) 1917 for (CeedInt c=0; c<num_comp; c++) 1918 for (CeedInt n=0; n<elem_size; n++) 1919 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 1920 fdm_diagonal[c*elem_size + n]); 1921 ierr = CeedFree(&elem_avg); CeedChk(ierr); 1922 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 1923 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 1924 1925 // Setup FDM operator 1926 // -- Basis 1927 CeedBasis fdm_basis; 1928 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 1929 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 1930 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 1931 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 1932 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 1933 fdm_interp, grad_dummy, q_ref_dummy, 1934 q_weight_dummy, &fdm_basis); CeedChk(ierr); 1935 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 1936 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 1937 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 1938 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 1939 ierr = CeedFree(&lambda); CeedChk(ierr); 1940 1941 // -- Restriction 1942 CeedElemRestriction rstr_qd_i; 1943 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 1944 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 1945 num_comp, num_elem*num_comp*elem_size, 1946 strides, &rstr_qd_i); CeedChk(ierr); 1947 // -- QFunction 1948 CeedQFunction qf_fdm; 1949 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 1950 CeedChk(ierr); 1951 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 1952 CeedChk(ierr); 1953 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 1954 CeedChk(ierr); 1955 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 1956 CeedChk(ierr); 1957 // -- QFunction context 1958 CeedInt *num_comp_data; 1959 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 1960 num_comp_data[0] = num_comp; 1961 CeedQFunctionContext ctx_fdm; 1962 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 1963 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 1964 sizeof(*num_comp_data), num_comp_data); 1965 CeedChk(ierr); 1966 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 1967 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 1968 // -- Operator 1969 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 1970 CeedChk(ierr); 1971 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 1972 CeedChk(ierr); 1973 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 1974 q_data); CeedChk(ierr); 1975 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 1976 CeedChk(ierr); 1977 1978 // Cleanup 1979 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 1980 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 1981 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 1982 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 1983 1984 return CEED_ERROR_SUCCESS; 1985 } 1986 1987 /// @} 1988