1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->is_interface_setup = false; 75 op_ref->is_backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 79 &op_ref->qf_assembled); CeedChk(ierr); 80 op->op_fallback = op_ref; 81 82 // Clone QF 83 CeedQFunction qf_ref; 84 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 85 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 86 qf_ref->data = NULL; 87 qf_ref->ceed = ceed_ref; 88 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 89 op_ref->qf = qf_ref; 90 op->qf_fallback = qf_ref; 91 return CEED_ERROR_SUCCESS; 92 } 93 94 /** 95 @brief Select correct basis matrix pointer based on CeedEvalMode 96 97 @param[in] eval_mode Current basis evaluation mode 98 @param[in] identity Pointer to identity matrix 99 @param[in] interp Pointer to interpolation matrix 100 @param[in] grad Pointer to gradient matrix 101 @param[out] basis_ptr Basis pointer to set 102 103 @ref Developer 104 **/ 105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 106 const CeedScalar *identity, const CeedScalar *interp, 107 const CeedScalar *grad, const CeedScalar **basis_ptr) { 108 switch (eval_mode) { 109 case CEED_EVAL_NONE: 110 *basis_ptr = identity; 111 break; 112 case CEED_EVAL_INTERP: 113 *basis_ptr = interp; 114 break; 115 case CEED_EVAL_GRAD: 116 *basis_ptr = grad; 117 break; 118 case CEED_EVAL_WEIGHT: 119 case CEED_EVAL_DIV: 120 case CEED_EVAL_CURL: 121 break; // Caught by QF Assembly 122 } 123 } 124 125 /** 126 @brief Create point block restriction for active operator field 127 128 @param[in] rstr Original CeedElemRestriction for active field 129 @param[out] pointblock_rstr Address of the variable where the newly created 130 CeedElemRestriction will be stored 131 132 @return An error code: 0 - success, otherwise - failure 133 134 @ref Developer 135 **/ 136 static int CeedOperatorCreateActivePointBlockRestriction( 137 CeedElemRestriction rstr, 138 CeedElemRestriction *pointblock_rstr) { 139 int ierr; 140 Ceed ceed; 141 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 142 const CeedInt *offsets; 143 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 144 CeedChk(ierr); 145 146 // Expand offsets 147 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 148 *pointblock_offsets; 149 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 152 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 153 CeedInt shift = num_comp; 154 if (comp_stride != 1) 155 shift *= num_comp; 156 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 157 CeedChk(ierr); 158 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 159 pointblock_offsets[i] = offsets[i]*shift; 160 if (pointblock_offsets[i] > max) 161 max = pointblock_offsets[i]; 162 } 163 164 // Create new restriction 165 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 166 1, max + num_comp*num_comp, CEED_MEM_HOST, 167 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 168 CeedChk(ierr); 169 170 // Cleanup 171 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 172 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else 181 CEED_REQUEST_IMMEDIATE 182 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 183 @param[out] assembled CeedVector to store assembled diagonal 184 185 @return An error code: 0 - success, otherwise - failure 186 187 @ref Developer 188 **/ 189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 190 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 191 int ierr; 192 Ceed ceed; 193 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 194 195 // Assemble QFunction 196 CeedQFunction qf; 197 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 198 CeedInt num_input_fields, num_output_fields; 199 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 200 CeedChk(ierr); 201 CeedVector assembled_qf; 202 CeedElemRestriction rstr; 203 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 204 &rstr, request); CeedChk(ierr); 205 CeedInt layout[3]; 206 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 207 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 208 CeedScalar max_norm = 0; 209 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 210 211 // Determine active input basis 212 CeedOperatorField *op_fields; 213 CeedQFunctionField *qf_fields; 214 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 215 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 216 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 217 CeedEvalMode *eval_mode_in = NULL; 218 CeedBasis basis_in = NULL; 219 CeedElemRestriction rstr_in = NULL; 220 for (CeedInt i=0; i<num_input_fields; i++) { 221 CeedVector vec; 222 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 223 if (vec == CEED_VECTOR_ACTIVE) { 224 CeedElemRestriction rstr; 225 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 226 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 227 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 228 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 229 if (rstr_in && rstr_in != rstr) 230 // LCOV_EXCL_START 231 return CeedError(ceed, CEED_ERROR_BACKEND, 232 "Multi-field non-composite operator diagonal assembly not supported"); 233 // LCOV_EXCL_STOP 234 rstr_in = rstr; 235 CeedEvalMode eval_mode; 236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 237 switch (eval_mode) { 238 case CEED_EVAL_NONE: 239 case CEED_EVAL_INTERP: 240 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 241 eval_mode_in[num_eval_mode_in] = eval_mode; 242 num_eval_mode_in += 1; 243 break; 244 case CEED_EVAL_GRAD: 245 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 246 for (CeedInt d=0; d<dim; d++) 247 eval_mode_in[num_eval_mode_in+d] = eval_mode; 248 num_eval_mode_in += dim; 249 break; 250 case CEED_EVAL_WEIGHT: 251 case CEED_EVAL_DIV: 252 case CEED_EVAL_CURL: 253 break; // Caught by QF Assembly 254 } 255 } 256 } 257 258 // Determine active output basis 259 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 260 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 261 CeedInt num_eval_mode_out = 0; 262 CeedEvalMode *eval_mode_out = NULL; 263 CeedBasis basis_out = NULL; 264 CeedElemRestriction rstr_out = NULL; 265 for (CeedInt i=0; i<num_output_fields; i++) { 266 CeedVector vec; 267 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 268 if (vec == CEED_VECTOR_ACTIVE) { 269 CeedElemRestriction rstr; 270 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 271 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 272 if (rstr_out && rstr_out != rstr) 273 // LCOV_EXCL_START 274 return CeedError(ceed, CEED_ERROR_BACKEND, 275 "Multi-field non-composite operator diagonal assembly not supported"); 276 // LCOV_EXCL_STOP 277 rstr_out = rstr; 278 CeedEvalMode eval_mode; 279 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 280 switch (eval_mode) { 281 case CEED_EVAL_NONE: 282 case CEED_EVAL_INTERP: 283 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 284 eval_mode_out[num_eval_mode_out] = eval_mode; 285 num_eval_mode_out += 1; 286 break; 287 case CEED_EVAL_GRAD: 288 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 289 for (CeedInt d=0; d<dim; d++) 290 eval_mode_out[num_eval_mode_out+d] = eval_mode; 291 num_eval_mode_out += dim; 292 break; 293 case CEED_EVAL_WEIGHT: 294 case CEED_EVAL_DIV: 295 case CEED_EVAL_CURL: 296 break; // Caught by QF Assembly 297 } 298 } 299 } 300 301 // Assemble point block diagonal restriction, if needed 302 CeedElemRestriction diag_rstr = rstr_out; 303 if (is_pointblock) { 304 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 305 CeedChk(ierr); 306 } 307 308 // Create diagonal vector 309 CeedVector elem_diag; 310 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 311 CeedChk(ierr); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array; 315 const CeedScalar *assembled_qf_array; 316 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 317 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 318 CeedChk(ierr); 319 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 320 CeedChk(ierr); 321 CeedInt num_elem, num_nodes, num_qpts; 322 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 323 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 324 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 325 // Basis matrices 326 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 327 CeedScalar *identity = NULL; 328 bool evalNone = false; 329 for (CeedInt i=0; i<num_eval_mode_in; i++) 330 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 331 for (CeedInt i=0; i<num_eval_mode_out; i++) 332 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 333 if (evalNone) { 334 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 335 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 336 identity[i*num_nodes+i] = 1.0; 337 } 338 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 339 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 340 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 341 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 342 // Compute the diagonal of B^T D B 343 // Each element 344 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 345 for (CeedInt e=0; e<num_elem; e++) { 346 CeedInt d_out = -1; 347 // Each basis eval mode pair 348 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 349 const CeedScalar *bt = NULL; 350 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 351 d_out += 1; 352 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 353 &grad_out[d_out*num_qpts*num_nodes], &bt); 354 CeedInt d_in = -1; 355 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 356 const CeedScalar *b = NULL; 357 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 358 d_in += 1; 359 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 360 &grad_in[d_in*num_qpts*num_nodes], &b); 361 // Each component 362 for (CeedInt c_out=0; c_out<num_comp; c_out++) 363 // Each qpoint/node pair 364 for (CeedInt q=0; q<num_qpts; q++) 365 if (is_pointblock) { 366 // Point Block Diagonal 367 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 368 const CeedScalar qf_value = 369 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 370 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 371 if (fabs(qf_value) > qf_value_bound) 372 for (CeedInt n=0; n<num_nodes; n++) 373 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 374 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 375 } 376 } else { 377 // Diagonal Only 378 const CeedScalar qf_value = 379 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 380 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 381 if (fabs(qf_value) > qf_value_bound) 382 for (CeedInt n=0; n<num_nodes; n++) 383 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 384 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 385 } 386 } 387 } 388 } 389 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 390 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 391 CeedChk(ierr); 392 393 // Assemble local operator diagonal 394 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 395 assembled, request); CeedChk(ierr); 396 397 // Cleanup 398 if (is_pointblock) { 399 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 400 } 401 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 402 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 403 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 404 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 405 ierr = CeedFree(&identity); CeedChk(ierr); 406 407 return CEED_ERROR_SUCCESS; 408 } 409 410 /** 411 @brief Core logic for assembling composite operator diagonal 412 413 @param[in] op CeedOperator to assemble point block diagonal 414 @param[in] request Address of CeedRequest for non-blocking completion, else 415 CEED_REQUEST_IMMEDIATE 416 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 417 @param[out] assembled CeedVector to store assembled diagonal 418 419 @return An error code: 0 - success, otherwise - failure 420 421 @ref Developer 422 **/ 423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 424 CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 int ierr; 427 CeedInt num_sub; 428 CeedOperator *suboperators; 429 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 430 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 431 for (CeedInt i = 0; i < num_sub; i++) { 432 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 433 is_pointblock, assembled); CeedChk(ierr); 434 } 435 return CEED_ERROR_SUCCESS; 436 } 437 438 /** 439 @brief Build nonzero pattern for non-composite operator 440 441 Users should generally use CeedOperatorLinearAssembleSymbolic() 442 443 @param[in] op CeedOperator to assemble nonzero pattern 444 @param[in] offset Offset for number of entries 445 @param[out] rows Row number for each entry 446 @param[out] cols Column number for each entry 447 448 @return An error code: 0 - success, otherwise - failure 449 450 @ref Developer 451 **/ 452 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 453 CeedInt *rows, CeedInt *cols) { 454 int ierr; 455 Ceed ceed = op->ceed; 456 if (op->is_composite) 457 // LCOV_EXCL_START 458 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 459 "Composite operator not supported"); 460 // LCOV_EXCL_STOP 461 462 CeedElemRestriction rstr_in; 463 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 464 CeedSize num_nodes; 465 CeedInt num_elem, elem_size, num_comp; 466 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 467 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 468 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 469 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 470 CeedInt layout_er[3]; 471 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 472 473 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 474 475 // Determine elem_dof relation 476 CeedVector index_vec; 477 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 478 CeedScalar *array; 479 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 480 for (CeedInt i = 0; i < num_nodes; ++i) { 481 array[i] = i; 482 } 483 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 484 CeedVector elem_dof; 485 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 486 CeedChk(ierr); 487 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 488 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 489 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 490 const CeedScalar *elem_dof_a; 491 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 492 CeedChk(ierr); 493 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 494 495 // Determine i, j locations for element matrices 496 CeedInt count = 0; 497 for (int e = 0; e < num_elem; ++e) { 498 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 499 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 500 for (int i = 0; i < elem_size; ++i) { 501 for (int j = 0; j < elem_size; ++j) { 502 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 503 (comp_out)*layout_er[1] + e*layout_er[2]; 504 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 505 (comp_in)*layout_er[1] + e*layout_er[2]; 506 507 const CeedInt row = elem_dof_a[elem_dof_index_row]; 508 const CeedInt col = elem_dof_a[elem_dof_index_col]; 509 510 rows[offset + count] = row; 511 cols[offset + count] = col; 512 count++; 513 } 514 } 515 } 516 } 517 } 518 if (count != local_num_entries) 519 // LCOV_EXCL_START 520 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 521 // LCOV_EXCL_STOP 522 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 523 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 524 525 return CEED_ERROR_SUCCESS; 526 } 527 528 /** 529 @brief Assemble nonzero entries for non-composite operator 530 531 Users should generally use CeedOperatorLinearAssemble() 532 533 @param[in] op CeedOperator to assemble 534 @param[out] offset Offest for number of entries 535 @param[out] values Values to assemble into matrix 536 537 @return An error code: 0 - success, otherwise - failure 538 539 @ref Developer 540 **/ 541 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 542 CeedVector values) { 543 int ierr; 544 Ceed ceed = op->ceed; 545 if (op->is_composite) 546 // LCOV_EXCL_START 547 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 548 "Composite operator not supported"); 549 // LCOV_EXCL_STOP 550 551 // Assemble QFunction 552 CeedQFunction qf; 553 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 554 CeedVector assembled_qf; 555 CeedElemRestriction rstr_q; 556 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 557 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 558 559 CeedSize qf_length; 560 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 561 562 CeedInt num_input_fields, num_output_fields; 563 CeedOperatorField *input_fields; 564 CeedOperatorField *output_fields; 565 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 566 &num_output_fields, &output_fields); CeedChk(ierr); 567 568 // Determine active input basis 569 CeedQFunctionField *qf_fields; 570 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 571 CeedInt num_eval_mode_in = 0, dim = 1; 572 CeedEvalMode *eval_mode_in = NULL; 573 CeedBasis basis_in = NULL; 574 CeedElemRestriction rstr_in = NULL; 575 for (CeedInt i=0; i<num_input_fields; i++) { 576 CeedVector vec; 577 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 578 if (vec == CEED_VECTOR_ACTIVE) { 579 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 580 CeedChk(ierr); 581 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 582 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 583 CeedChk(ierr); 584 CeedEvalMode eval_mode; 585 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 586 CeedChk(ierr); 587 switch (eval_mode) { 588 case CEED_EVAL_NONE: 589 case CEED_EVAL_INTERP: 590 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 591 eval_mode_in[num_eval_mode_in] = eval_mode; 592 num_eval_mode_in += 1; 593 break; 594 case CEED_EVAL_GRAD: 595 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 596 for (CeedInt d=0; d<dim; d++) { 597 eval_mode_in[num_eval_mode_in+d] = eval_mode; 598 } 599 num_eval_mode_in += dim; 600 break; 601 case CEED_EVAL_WEIGHT: 602 case CEED_EVAL_DIV: 603 case CEED_EVAL_CURL: 604 break; // Caught by QF Assembly 605 } 606 } 607 } 608 609 // Determine active output basis 610 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 611 CeedInt num_eval_mode_out = 0; 612 CeedEvalMode *eval_mode_out = NULL; 613 CeedBasis basis_out = NULL; 614 CeedElemRestriction rstr_out = NULL; 615 for (CeedInt i=0; i<num_output_fields; i++) { 616 CeedVector vec; 617 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 618 if (vec == CEED_VECTOR_ACTIVE) { 619 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 620 CeedChk(ierr); 621 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 622 CeedChk(ierr); 623 CeedEvalMode eval_mode; 624 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 625 CeedChk(ierr); 626 switch (eval_mode) { 627 case CEED_EVAL_NONE: 628 case CEED_EVAL_INTERP: 629 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 630 eval_mode_out[num_eval_mode_out] = eval_mode; 631 num_eval_mode_out += 1; 632 break; 633 case CEED_EVAL_GRAD: 634 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 635 for (CeedInt d=0; d<dim; d++) { 636 eval_mode_out[num_eval_mode_out+d] = eval_mode; 637 } 638 num_eval_mode_out += dim; 639 break; 640 case CEED_EVAL_WEIGHT: 641 case CEED_EVAL_DIV: 642 case CEED_EVAL_CURL: 643 break; // Caught by QF Assembly 644 } 645 } 646 } 647 648 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 649 // LCOV_EXCL_START 650 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 651 "Cannot assemble operator with out inputs/outputs"); 652 // LCOV_EXCL_STOP 653 654 CeedInt num_elem, elem_size, num_qpts, num_comp; 655 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 656 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 657 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 658 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 659 660 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 661 662 // loop over elements and put in data structure 663 const CeedScalar *interp_in, *grad_in; 664 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 665 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 666 667 const CeedScalar *assembled_qf_array; 668 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 669 CeedChk(ierr); 670 671 CeedInt layout_qf[3]; 672 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 673 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 674 675 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 676 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 677 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 678 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 679 num_qpts]; // logically 3-tensor 680 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 681 CeedScalar elem_mat[elem_size * elem_size]; 682 int count = 0; 683 CeedScalar *vals; 684 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 685 for (int e = 0; e < num_elem; ++e) { 686 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 687 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 688 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 689 B_mat_in[ell] = 0.0; 690 } 691 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 692 B_mat_out[ell] = 0.0; 693 } 694 // Store block-diagonal D matrix as collection of small dense blocks 695 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 696 D_mat[ell] = 0.0; 697 } 698 // form element matrix itself (for each block component) 699 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 700 elem_mat[ell] = 0.0; 701 } 702 for (int q = 0; q < num_qpts; ++q) { 703 for (int n = 0; n < elem_size; ++n) { 704 CeedInt d_in = -1; 705 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 706 const int qq = num_eval_mode_in*q; 707 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 708 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 709 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 710 d_in += 1; 711 B_mat_in[(qq+e_in)*elem_size + n] += 712 grad_in[(d_in*num_qpts+q) * elem_size + n]; 713 } else { 714 // LCOV_EXCL_START 715 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 716 // LCOV_EXCL_STOP 717 } 718 } 719 CeedInt d_out = -1; 720 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 721 const int qq = num_eval_mode_out*q; 722 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 723 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 724 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 725 d_out += 1; 726 B_mat_out[(qq+e_out)*elem_size + n] += 727 grad_in[(d_out*num_qpts+q) * elem_size + n]; 728 } else { 729 // LCOV_EXCL_START 730 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 731 // LCOV_EXCL_STOP 732 } 733 } 734 } 735 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 736 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 737 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 738 +comp_out; 739 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 740 e*layout_qf[2]; 741 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 742 } 743 } 744 } 745 // Compute B^T*D 746 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 747 BTD[ell] = 0.0; 748 } 749 for (int j = 0; j<elem_size; ++j) { 750 for (int q = 0; q<num_qpts; ++q) { 751 int qq = num_eval_mode_out*q; 752 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 753 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 754 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 755 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 756 } 757 } 758 } 759 } 760 761 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 762 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 763 764 // put element matrix in coordinate data structure 765 for (int i = 0; i < elem_size; ++i) { 766 for (int j = 0; j < elem_size; ++j) { 767 vals[offset + count] = elem_mat[i*elem_size + j]; 768 count++; 769 } 770 } 771 } 772 } 773 } 774 if (count != local_num_entries) 775 // LCOV_EXCL_START 776 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 777 // LCOV_EXCL_STOP 778 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 779 780 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 781 CeedChk(ierr); 782 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 783 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 784 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 785 786 return CEED_ERROR_SUCCESS; 787 } 788 789 /** 790 @brief Count number of entries for assembled CeedOperator 791 792 @param[in] op CeedOperator to assemble 793 @param[out] num_entries Number of entries in assembled representation 794 795 @return An error code: 0 - success, otherwise - failure 796 797 @ref Utility 798 **/ 799 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 800 CeedInt *num_entries) { 801 int ierr; 802 CeedElemRestriction rstr; 803 CeedInt num_elem, elem_size, num_comp; 804 805 if (op->is_composite) 806 // LCOV_EXCL_START 807 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 808 "Composite operator not supported"); 809 // LCOV_EXCL_STOP 810 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 811 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 812 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 813 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 814 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 815 816 return CEED_ERROR_SUCCESS; 817 } 818 819 /** 820 @brief Common code for creating a multigrid coarse operator and level 821 transfer operators for a CeedOperator 822 823 @param[in] op_fine Fine grid operator 824 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 825 @param[in] rstr_coarse Coarse grid restriction 826 @param[in] basis_coarse Coarse grid active vector basis 827 @param[in] basis_c_to_f Basis for coarse to fine interpolation 828 @param[out] op_coarse Coarse grid operator 829 @param[out] op_prolong Coarse to fine operator 830 @param[out] op_restrict Fine to coarse operator 831 832 @return An error code: 0 - success, otherwise - failure 833 834 @ref Developer 835 **/ 836 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 837 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 838 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 839 CeedOperator *op_restrict) { 840 int ierr; 841 Ceed ceed; 842 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 843 844 // Check for composite operator 845 bool is_composite; 846 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 847 if (is_composite) 848 // LCOV_EXCL_START 849 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 850 "Automatic multigrid setup for composite operators not supported"); 851 // LCOV_EXCL_STOP 852 853 // Coarse Grid 854 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 855 op_coarse); CeedChk(ierr); 856 CeedElemRestriction rstr_fine = NULL; 857 // -- Clone input fields 858 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 859 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 860 rstr_fine = op_fine->input_fields[i]->elem_restr; 861 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 862 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 863 CeedChk(ierr); 864 } else { 865 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 866 op_fine->input_fields[i]->elem_restr, 867 op_fine->input_fields[i]->basis, 868 op_fine->input_fields[i]->vec); CeedChk(ierr); 869 } 870 } 871 // -- Clone output fields 872 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 873 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 874 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 875 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 876 CeedChk(ierr); 877 } else { 878 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 879 op_fine->output_fields[i]->elem_restr, 880 op_fine->output_fields[i]->basis, 881 op_fine->output_fields[i]->vec); CeedChk(ierr); 882 } 883 } 884 // -- Clone QFunctionAssemblyData 885 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 886 &(*op_coarse)->qf_assembled); CeedChk(ierr); 887 888 // Multiplicity vector 889 CeedVector mult_vec, mult_e_vec; 890 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 891 CeedChk(ierr); 892 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 893 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 894 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 895 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 896 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 897 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 898 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 899 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 900 901 // Restriction 902 CeedInt num_comp; 903 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 904 CeedQFunction qf_restrict; 905 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 906 CeedChk(ierr); 907 CeedInt *num_comp_r_data; 908 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 909 num_comp_r_data[0] = num_comp; 910 CeedQFunctionContext ctx_r; 911 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 912 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 913 sizeof(*num_comp_r_data), num_comp_r_data); 914 CeedChk(ierr); 915 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 916 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 917 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 918 CeedChk(ierr); 919 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 920 CeedChk(ierr); 921 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 922 CEED_EVAL_INTERP); CeedChk(ierr); 923 924 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 925 CEED_QFUNCTION_NONE, op_restrict); 926 CeedChk(ierr); 927 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 928 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 929 CeedChk(ierr); 930 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 931 CEED_BASIS_COLLOCATED, mult_vec); 932 CeedChk(ierr); 933 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 934 CEED_VECTOR_ACTIVE); CeedChk(ierr); 935 936 // Prolongation 937 CeedQFunction qf_prolong; 938 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 939 CeedChk(ierr); 940 CeedInt *num_comp_p_data; 941 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 942 num_comp_p_data[0] = num_comp; 943 CeedQFunctionContext ctx_p; 944 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 945 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 946 sizeof(*num_comp_p_data), num_comp_p_data); 947 CeedChk(ierr); 948 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 949 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 950 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 951 CeedChk(ierr); 952 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 953 CeedChk(ierr); 954 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 955 CeedChk(ierr); 956 957 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 958 CEED_QFUNCTION_NONE, op_prolong); 959 CeedChk(ierr); 960 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 961 CEED_VECTOR_ACTIVE); CeedChk(ierr); 962 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 963 CEED_BASIS_COLLOCATED, mult_vec); 964 CeedChk(ierr); 965 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 966 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 967 CeedChk(ierr); 968 969 // Cleanup 970 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 971 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 972 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 973 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 974 return CEED_ERROR_SUCCESS; 975 } 976 977 /** 978 @brief Build 1D mass matrix and Laplacian with perturbation 979 980 @param[in] interp_1d Interpolation matrix in one dimension 981 @param[in] grad_1d Gradient matrix in one dimension 982 @param[in] q_weight_1d Quadrature weights in one dimension 983 @param[in] P_1d Number of basis nodes in one dimension 984 @param[in] Q_1d Number of quadrature points in one dimension 985 @param[in] dim Dimension of basis 986 @param[out] mass Assembled mass matrix in one dimension 987 @param[out] laplace Assembled perturbed Laplacian in one dimension 988 989 @return An error code: 0 - success, otherwise - failure 990 991 @ref Developer 992 **/ 993 CeedPragmaOptimizeOff 994 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 995 const CeedScalar *grad_1d, 996 const CeedScalar *q_weight_1d, CeedInt P_1d, 997 CeedInt Q_1d, CeedInt dim, 998 CeedScalar *mass, CeedScalar *laplace) { 999 1000 for (CeedInt i=0; i<P_1d; i++) 1001 for (CeedInt j=0; j<P_1d; j++) { 1002 CeedScalar sum = 0.0; 1003 for (CeedInt k=0; k<Q_1d; k++) 1004 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1005 mass[i+j*P_1d] = sum; 1006 } 1007 // -- Laplacian 1008 for (CeedInt i=0; i<P_1d; i++) 1009 for (CeedInt j=0; j<P_1d; j++) { 1010 CeedScalar sum = 0.0; 1011 for (CeedInt k=0; k<Q_1d; k++) 1012 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1013 laplace[i+j*P_1d] = sum; 1014 } 1015 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1016 for (CeedInt i=0; i<P_1d; i++) 1017 laplace[i+P_1d*i] += perturbation; 1018 return CEED_ERROR_SUCCESS; 1019 } 1020 CeedPragmaOptimizeOn 1021 1022 /// @} 1023 1024 /// ---------------------------------------------------------------------------- 1025 /// CeedOperator Backend API 1026 /// ---------------------------------------------------------------------------- 1027 /// @addtogroup CeedOperatorBackend 1028 /// @{ 1029 1030 /** 1031 @brief Create object holding CeedQFunction assembly data for CeedOperator 1032 1033 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1034 @param[out] data Address of the variable where the newly created 1035 CeedQFunctionAssemblyData will be stored 1036 1037 @return An error code: 0 - success, otherwise - failure 1038 1039 @ref Backend 1040 **/ 1041 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1042 CeedQFunctionAssemblyData *data) { 1043 int ierr; 1044 1045 ierr = CeedCalloc(1, data); CeedChk(ierr); 1046 (*data)->ref_count = 1; 1047 (*data)->ceed = ceed; 1048 ierr = CeedReference(ceed); CeedChk(ierr); 1049 1050 return CEED_ERROR_SUCCESS; 1051 } 1052 1053 /** 1054 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1055 1056 @param data CeedQFunctionAssemblyData to increment the reference counter 1057 1058 @return An error code: 0 - success, otherwise - failure 1059 1060 @ref Backend 1061 **/ 1062 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1063 data->ref_count++; 1064 return CEED_ERROR_SUCCESS; 1065 } 1066 1067 /** 1068 @brief Mark QFunctionAssemblyData as stale 1069 1070 @param data CeedQFunctionAssemblyData to mark as stale 1071 @param is_update_needed Boolean flag indicating if update is needed or completed 1072 1073 @return An error code: 0 - success, otherwise - failure 1074 1075 @ref Backend 1076 **/ 1077 int CeedQFunctionAssemblyDataSetQFunctionUpdated(CeedQFunctionAssemblyData data, 1078 bool is_update_needed) { 1079 data->is_update_flag_used = data->is_update_flag_used || is_update_needed; 1080 data->is_update_needed = is_update_needed; 1081 return CEED_ERROR_SUCCESS; 1082 } 1083 1084 /** 1085 @brief Determine if QFunctionAssemblyData needs update 1086 1087 @param[in] data CeedQFunctionAssemblyData to mark as stale 1088 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1089 1090 @return An error code: 0 - success, otherwise - failure 1091 1092 @ref Backend 1093 **/ 1094 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1095 bool *is_update_needed) { 1096 *is_update_needed = !data->is_update_flag_used || data->is_update_needed; 1097 return CEED_ERROR_SUCCESS; 1098 } 1099 1100 /** 1101 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1102 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1103 Note: If `*data_copy` is non-NULL, then it is assumed that 1104 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1105 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1106 the only reference to this CeedQFunctionAssemblyData. 1107 1108 @param data CeedQFunctionAssemblyData to copy reference to 1109 @param[out] data_copy Variable to store copied reference 1110 1111 @return An error code: 0 - success, otherwise - failure 1112 1113 @ref Backend 1114 **/ 1115 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1116 CeedQFunctionAssemblyData *data_copy) { 1117 int ierr; 1118 1119 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1120 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1121 *data_copy = data; 1122 return CEED_ERROR_SUCCESS; 1123 } 1124 1125 /** 1126 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1127 1128 @param[in] data CeedQFunctionAssemblyData to retreive status 1129 @param[out] is_setup Boolean flag for setup status 1130 1131 @return An error code: 0 - success, otherwise - failure 1132 1133 @ref Backend 1134 **/ 1135 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1136 bool *is_setup) { 1137 *is_setup = data->is_setup; 1138 return CEED_ERROR_SUCCESS; 1139 } 1140 1141 /** 1142 @brief Set internal objects for CeedQFunctionAssemblyData 1143 1144 @param[in] data CeedQFunctionAssemblyData to set objects 1145 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1146 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1147 1148 @return An error code: 0 - success, otherwise - failure 1149 1150 @ref Backend 1151 **/ 1152 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1153 CeedVector vec, CeedElemRestriction rstr) { 1154 int ierr; 1155 1156 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1157 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1158 1159 data->is_setup = true; 1160 return CEED_ERROR_SUCCESS; 1161 } 1162 1163 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1164 CeedVector *vec, CeedElemRestriction *rstr) { 1165 int ierr; 1166 1167 if (!data->is_setup) 1168 // LCOV_EXCL_START 1169 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1170 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1171 // LCOV_EXCL_STOP 1172 1173 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1174 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1175 1176 return CEED_ERROR_SUCCESS; 1177 } 1178 1179 /** 1180 @brief Destroy CeedQFunctionAssemblyData 1181 1182 @param[out] data CeedQFunctionAssemblyData to destroy 1183 1184 @return An error code: 0 - success, otherwise - failure 1185 1186 @ref Backend 1187 **/ 1188 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1189 int ierr; 1190 1191 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1192 1193 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1194 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1195 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1196 1197 ierr = CeedFree(data); CeedChk(ierr); 1198 return CEED_ERROR_SUCCESS; 1199 } 1200 1201 /// @} 1202 1203 /// ---------------------------------------------------------------------------- 1204 /// CeedOperator Public API 1205 /// ---------------------------------------------------------------------------- 1206 /// @addtogroup CeedOperatorUser 1207 /// @{ 1208 1209 /** 1210 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1211 1212 This returns a CeedVector containing a matrix at each quadrature point 1213 providing the action of the CeedQFunction associated with the CeedOperator. 1214 The vector 'assembled' is of shape 1215 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1216 and contains column-major matrices representing the action of the 1217 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1218 outputs are in the order provided by the user when adding CeedOperator fields. 1219 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1220 'v', provided in that order, would result in an assembled QFunction that 1221 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1222 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1223 1224 Note: Calling this function asserts that setup is complete 1225 and sets the CeedOperator as immutable. 1226 1227 @param op CeedOperator to assemble CeedQFunction 1228 @param[out] assembled CeedVector to store assembled CeedQFunction at 1229 quadrature points 1230 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1231 CeedQFunction 1232 @param request Address of CeedRequest for non-blocking completion, else 1233 @ref CEED_REQUEST_IMMEDIATE 1234 1235 @return An error code: 0 - success, otherwise - failure 1236 1237 @ref User 1238 **/ 1239 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1240 CeedElemRestriction *rstr, 1241 CeedRequest *request) { 1242 int ierr; 1243 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1244 1245 // Backend version 1246 if (op->LinearAssembleQFunction) { 1247 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1248 CeedChk(ierr); 1249 } else { 1250 // Fallback to reference Ceed 1251 if (!op->op_fallback) { 1252 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1253 } 1254 // Assemble 1255 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1256 rstr, request); CeedChk(ierr); 1257 } 1258 return CEED_ERROR_SUCCESS; 1259 } 1260 1261 /** 1262 @brief Assemble CeedQFunction and store result internall. Return copied 1263 references of stored data to the caller. Caller is responsible for 1264 ownership and destruction of the copied references. See also 1265 @ref CeedOperatorLinearAssembleQFunction 1266 1267 @param op CeedOperator to assemble CeedQFunction 1268 @param assembled CeedVector to store assembled CeedQFunction at 1269 quadrature points 1270 @param rstr CeedElemRestriction for CeedVector containing assembled 1271 CeedQFunction 1272 @param request Address of CeedRequest for non-blocking completion, else 1273 @ref CEED_REQUEST_IMMEDIATE 1274 1275 @return An error code: 0 - success, otherwise - failure 1276 1277 @ref User 1278 **/ 1279 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1280 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1281 int ierr; 1282 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1283 1284 // Backend version 1285 if (op->LinearAssembleQFunctionUpdate) { 1286 bool qf_assembled_is_setup; 1287 CeedVector assembled_vec = NULL; 1288 CeedElemRestriction assembled_rstr = NULL; 1289 1290 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1291 &qf_assembled_is_setup); CeedChk(ierr); 1292 if (qf_assembled_is_setup) { 1293 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1294 &assembled_rstr); CeedChk(ierr); 1295 1296 bool update_needed; 1297 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1298 &update_needed); CeedChk(ierr); 1299 if (update_needed) { 1300 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1301 request); CeedChk(ierr); 1302 } 1303 } else { 1304 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1305 request); CeedChk(ierr); 1306 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1307 assembled_rstr); CeedChk(ierr); 1308 } 1309 ierr = CeedQFunctionAssemblyDataSetQFunctionUpdated(op->qf_assembled, false); 1310 CeedChk(ierr); 1311 1312 // Copy reference to internally held copy 1313 *assembled = NULL; 1314 *rstr = NULL; 1315 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1316 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1317 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1318 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1319 } else { 1320 // Fallback to reference Ceed 1321 if (!op->op_fallback) { 1322 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1323 } 1324 // Assemble 1325 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1326 assembled, rstr, request); CeedChk(ierr); 1327 } 1328 1329 return CEED_ERROR_SUCCESS; 1330 } 1331 1332 /** 1333 @brief Assemble the diagonal of a square linear CeedOperator 1334 1335 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1336 1337 Note: Currently only non-composite CeedOperators with a single field and 1338 composite CeedOperators with single field sub-operators are supported. 1339 1340 Note: Calling this function asserts that setup is complete 1341 and sets the CeedOperator as immutable. 1342 1343 @param op CeedOperator to assemble CeedQFunction 1344 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1345 @param request Address of CeedRequest for non-blocking completion, else 1346 @ref CEED_REQUEST_IMMEDIATE 1347 1348 @return An error code: 0 - success, otherwise - failure 1349 1350 @ref User 1351 **/ 1352 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1353 CeedRequest *request) { 1354 int ierr; 1355 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1356 1357 // Use backend version, if available 1358 if (op->LinearAssembleDiagonal) { 1359 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1360 return CEED_ERROR_SUCCESS; 1361 } else if (op->LinearAssembleAddDiagonal) { 1362 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1363 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1364 return CEED_ERROR_SUCCESS; 1365 } else { 1366 // Check for valid fallback resource 1367 const char *resource, *fallback_resource; 1368 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1369 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1370 CeedChk(ierr); 1371 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1372 // Fallback to reference Ceed 1373 if (!op->op_fallback) { 1374 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1375 } 1376 // Assemble 1377 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1378 CeedChk(ierr); 1379 return CEED_ERROR_SUCCESS; 1380 } 1381 } 1382 1383 // Default interface implementation 1384 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1385 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1386 CeedChk(ierr); 1387 return CEED_ERROR_SUCCESS; 1388 } 1389 1390 /** 1391 @brief Assemble the diagonal of a square linear CeedOperator 1392 1393 This sums into a CeedVector the diagonal of a linear CeedOperator. 1394 1395 Note: Currently only non-composite CeedOperators with a single field and 1396 composite CeedOperators with single field sub-operators are supported. 1397 1398 Note: Calling this function asserts that setup is complete 1399 and sets the CeedOperator as immutable. 1400 1401 @param op CeedOperator to assemble CeedQFunction 1402 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1403 @param request Address of CeedRequest for non-blocking completion, else 1404 @ref CEED_REQUEST_IMMEDIATE 1405 1406 @return An error code: 0 - success, otherwise - failure 1407 1408 @ref User 1409 **/ 1410 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1411 CeedRequest *request) { 1412 int ierr; 1413 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1414 1415 // Use backend version, if available 1416 if (op->LinearAssembleAddDiagonal) { 1417 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1418 return CEED_ERROR_SUCCESS; 1419 } else { 1420 // Check for valid fallback resource 1421 const char *resource, *fallback_resource; 1422 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1423 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1424 CeedChk(ierr); 1425 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1426 // Fallback to reference Ceed 1427 if (!op->op_fallback) { 1428 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1429 } 1430 // Assemble 1431 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1432 request); CeedChk(ierr); 1433 return CEED_ERROR_SUCCESS; 1434 } 1435 } 1436 1437 // Default interface implementation 1438 bool is_composite; 1439 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1440 if (is_composite) { 1441 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1442 false, assembled); CeedChk(ierr); 1443 return CEED_ERROR_SUCCESS; 1444 } else { 1445 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1446 CeedChk(ierr); 1447 return CEED_ERROR_SUCCESS; 1448 } 1449 } 1450 1451 /** 1452 @brief Assemble the point block diagonal of a square linear CeedOperator 1453 1454 This overwrites a CeedVector with the point block diagonal of a linear 1455 CeedOperator. 1456 1457 Note: Currently only non-composite CeedOperators with a single field and 1458 composite CeedOperators with single field sub-operators are supported. 1459 1460 Note: Calling this function asserts that setup is complete 1461 and sets the CeedOperator as immutable. 1462 1463 @param op CeedOperator to assemble CeedQFunction 1464 @param[out] assembled CeedVector to store assembled CeedOperator point block 1465 diagonal, provided in row-major form with an 1466 @a num_comp * @a num_comp block at each node. The dimensions 1467 of this vector are derived from the active vector 1468 for the CeedOperator. The array has shape 1469 [nodes, component out, component in]. 1470 @param request Address of CeedRequest for non-blocking completion, else 1471 CEED_REQUEST_IMMEDIATE 1472 1473 @return An error code: 0 - success, otherwise - failure 1474 1475 @ref User 1476 **/ 1477 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1478 CeedVector assembled, CeedRequest *request) { 1479 int ierr; 1480 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1481 1482 // Use backend version, if available 1483 if (op->LinearAssemblePointBlockDiagonal) { 1484 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1485 CeedChk(ierr); 1486 return CEED_ERROR_SUCCESS; 1487 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1488 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1489 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1490 request); CeedChk(ierr); 1491 return CEED_ERROR_SUCCESS; 1492 } else { 1493 // Check for valid fallback resource 1494 const char *resource, *fallback_resource; 1495 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1496 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1497 CeedChk(ierr); 1498 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1499 // Fallback to reference Ceed 1500 if (!op->op_fallback) { 1501 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1502 } 1503 // Assemble 1504 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1505 assembled, request); CeedChk(ierr); 1506 return CEED_ERROR_SUCCESS; 1507 } 1508 } 1509 1510 // Default interface implementation 1511 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1512 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1513 CeedChk(ierr); 1514 return CEED_ERROR_SUCCESS; 1515 } 1516 1517 /** 1518 @brief Assemble the point block diagonal of a square linear CeedOperator 1519 1520 This sums into a CeedVector with the point block diagonal of a linear 1521 CeedOperator. 1522 1523 Note: Currently only non-composite CeedOperators with a single field and 1524 composite CeedOperators with single field sub-operators are supported. 1525 1526 Note: Calling this function asserts that setup is complete 1527 and sets the CeedOperator as immutable. 1528 1529 @param op CeedOperator to assemble CeedQFunction 1530 @param[out] assembled CeedVector to store assembled CeedOperator point block 1531 diagonal, provided in row-major form with an 1532 @a num_comp * @a num_comp block at each node. The dimensions 1533 of this vector are derived from the active vector 1534 for the CeedOperator. The array has shape 1535 [nodes, component out, component in]. 1536 @param request Address of CeedRequest for non-blocking completion, else 1537 CEED_REQUEST_IMMEDIATE 1538 1539 @return An error code: 0 - success, otherwise - failure 1540 1541 @ref User 1542 **/ 1543 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1544 CeedVector assembled, CeedRequest *request) { 1545 int ierr; 1546 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1547 1548 // Use backend version, if available 1549 if (op->LinearAssembleAddPointBlockDiagonal) { 1550 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1551 CeedChk(ierr); 1552 return CEED_ERROR_SUCCESS; 1553 } else { 1554 // Check for valid fallback resource 1555 const char *resource, *fallback_resource; 1556 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1557 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1558 CeedChk(ierr); 1559 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1560 // Fallback to reference Ceed 1561 if (!op->op_fallback) { 1562 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1563 } 1564 // Assemble 1565 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1566 assembled, request); CeedChk(ierr); 1567 return CEED_ERROR_SUCCESS; 1568 } 1569 } 1570 1571 // Default interface implemenation 1572 bool is_composite; 1573 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1574 if (is_composite) { 1575 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1576 true, assembled); CeedChk(ierr); 1577 return CEED_ERROR_SUCCESS; 1578 } else { 1579 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1580 CeedChk(ierr); 1581 return CEED_ERROR_SUCCESS; 1582 } 1583 } 1584 1585 /** 1586 @brief Fully assemble the nonzero pattern of a linear operator. 1587 1588 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1589 1590 The assembly routines use coordinate format, with num_entries tuples of the 1591 form (i, j, value) which indicate that value should be added to the matrix 1592 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1593 This function returns the number of entries and their (i, j) locations, 1594 while CeedOperatorLinearAssemble() provides the values in the same 1595 ordering. 1596 1597 This will generally be slow unless your operator is low-order. 1598 1599 Note: Calling this function asserts that setup is complete 1600 and sets the CeedOperator as immutable. 1601 1602 @param[in] op CeedOperator to assemble 1603 @param[out] num_entries Number of entries in coordinate nonzero pattern 1604 @param[out] rows Row number for each entry 1605 @param[out] cols Column number for each entry 1606 1607 @ref User 1608 **/ 1609 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1610 CeedInt **rows, CeedInt **cols) { 1611 int ierr; 1612 CeedInt num_suboperators, single_entries; 1613 CeedOperator *sub_operators; 1614 bool is_composite; 1615 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1616 1617 // Use backend version, if available 1618 if (op->LinearAssembleSymbolic) { 1619 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1620 return CEED_ERROR_SUCCESS; 1621 } else { 1622 // Check for valid fallback resource 1623 const char *resource, *fallback_resource; 1624 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1625 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1626 CeedChk(ierr); 1627 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1628 // Fallback to reference Ceed 1629 if (!op->op_fallback) { 1630 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1631 } 1632 // Assemble 1633 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1634 cols); CeedChk(ierr); 1635 return CEED_ERROR_SUCCESS; 1636 } 1637 } 1638 1639 // Default interface implementation 1640 1641 // count entries and allocate rows, cols arrays 1642 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1643 *num_entries = 0; 1644 if (is_composite) { 1645 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1646 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1647 for (int k = 0; k < num_suboperators; ++k) { 1648 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1649 &single_entries); CeedChk(ierr); 1650 *num_entries += single_entries; 1651 } 1652 } else { 1653 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1654 &single_entries); CeedChk(ierr); 1655 *num_entries += single_entries; 1656 } 1657 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1658 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1659 1660 // assemble nonzero locations 1661 CeedInt offset = 0; 1662 if (is_composite) { 1663 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1664 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1665 for (int k = 0; k < num_suboperators; ++k) { 1666 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1667 *cols); CeedChk(ierr); 1668 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1669 &single_entries); 1670 CeedChk(ierr); 1671 offset += single_entries; 1672 } 1673 } else { 1674 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1675 CeedChk(ierr); 1676 } 1677 1678 return CEED_ERROR_SUCCESS; 1679 } 1680 1681 /** 1682 @brief Fully assemble the nonzero entries of a linear operator. 1683 1684 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1685 1686 The assembly routines use coordinate format, with num_entries tuples of the 1687 form (i, j, value) which indicate that value should be added to the matrix 1688 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1689 This function returns the values of the nonzero entries to be added, their 1690 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1691 1692 This will generally be slow unless your operator is low-order. 1693 1694 Note: Calling this function asserts that setup is complete 1695 and sets the CeedOperator as immutable. 1696 1697 @param[in] op CeedOperator to assemble 1698 @param[out] values Values to assemble into matrix 1699 1700 @ref User 1701 **/ 1702 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1703 int ierr; 1704 CeedInt num_suboperators, single_entries = 0; 1705 CeedOperator *sub_operators; 1706 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1707 1708 // Use backend version, if available 1709 if (op->LinearAssemble) { 1710 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1711 return CEED_ERROR_SUCCESS; 1712 } else { 1713 // Check for valid fallback resource 1714 const char *resource, *fallback_resource; 1715 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1716 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1717 CeedChk(ierr); 1718 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1719 // Fallback to reference Ceed 1720 if (!op->op_fallback) { 1721 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1722 } 1723 // Assemble 1724 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1725 return CEED_ERROR_SUCCESS; 1726 } 1727 } 1728 1729 // Default interface implementation 1730 bool is_composite; 1731 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1732 1733 CeedInt offset = 0; 1734 if (is_composite) { 1735 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1736 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1737 for (int k = 0; k < num_suboperators; ++k) { 1738 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1739 CeedChk(ierr); 1740 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1741 &single_entries); 1742 CeedChk(ierr); 1743 offset += single_entries; 1744 } 1745 } else { 1746 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1747 } 1748 1749 return CEED_ERROR_SUCCESS; 1750 } 1751 1752 /** 1753 @brief Create a multigrid coarse operator and level transfer operators 1754 for a CeedOperator, creating the prolongation basis from the 1755 fine and coarse grid interpolation 1756 1757 Note: Calling this function asserts that setup is complete 1758 and sets the CeedOperator as immutable. 1759 1760 @param[in] op_fine Fine grid operator 1761 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1762 @param[in] rstr_coarse Coarse grid restriction 1763 @param[in] basis_coarse Coarse grid active vector basis 1764 @param[out] op_coarse Coarse grid operator 1765 @param[out] op_prolong Coarse to fine operator 1766 @param[out] op_restrict Fine to coarse operator 1767 1768 @return An error code: 0 - success, otherwise - failure 1769 1770 @ref User 1771 **/ 1772 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1773 CeedVector p_mult_fine, 1774 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1775 CeedOperator *op_coarse, CeedOperator *op_prolong, 1776 CeedOperator *op_restrict) { 1777 int ierr; 1778 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1779 Ceed ceed; 1780 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1781 1782 // Check for compatible quadrature spaces 1783 CeedBasis basis_fine; 1784 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1785 CeedInt Q_f, Q_c; 1786 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1787 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1788 if (Q_f != Q_c) 1789 // LCOV_EXCL_START 1790 return CeedError(ceed, CEED_ERROR_DIMENSION, 1791 "Bases must have compatible quadrature spaces"); 1792 // LCOV_EXCL_STOP 1793 1794 // Coarse to fine basis 1795 CeedInt P_f, P_c, Q = Q_f; 1796 bool is_tensor_f, is_tensor_c; 1797 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1798 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1799 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1800 if (is_tensor_f && is_tensor_c) { 1801 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1802 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1803 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1804 } else if (!is_tensor_f && !is_tensor_c) { 1805 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1806 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1807 } else { 1808 // LCOV_EXCL_START 1809 return CeedError(ceed, CEED_ERROR_MINOR, 1810 "Bases must both be tensor or non-tensor"); 1811 // LCOV_EXCL_STOP 1812 } 1813 1814 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1815 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1816 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1817 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1818 if (is_tensor_f) { 1819 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1820 memcpy(interp_c, basis_coarse->interp_1d, 1821 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1822 } else { 1823 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1824 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1825 } 1826 1827 // -- QR Factorization, interp_f = Q R 1828 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1829 1830 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1831 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1832 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1833 1834 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1835 for (CeedInt j=0; j<P_c; j++) { // Column j 1836 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1837 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1838 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1839 for (CeedInt k=i+1; k<P_f; k++) 1840 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1841 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1842 } 1843 } 1844 ierr = CeedFree(&tau); CeedChk(ierr); 1845 ierr = CeedFree(&interp_c); CeedChk(ierr); 1846 ierr = CeedFree(&interp_f); CeedChk(ierr); 1847 1848 // Complete with interp_c_to_f versions of code 1849 if (is_tensor_f) { 1850 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1851 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1852 CeedChk(ierr); 1853 } else { 1854 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1855 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1856 CeedChk(ierr); 1857 } 1858 1859 // Cleanup 1860 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1861 return CEED_ERROR_SUCCESS; 1862 } 1863 1864 /** 1865 @brief Create a multigrid coarse operator and level transfer operators 1866 for a CeedOperator with a tensor basis for the active basis 1867 1868 Note: Calling this function asserts that setup is complete 1869 and sets the CeedOperator as immutable. 1870 1871 @param[in] op_fine Fine grid operator 1872 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1873 @param[in] rstr_coarse Coarse grid restriction 1874 @param[in] basis_coarse Coarse grid active vector basis 1875 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1876 @param[out] op_coarse Coarse grid operator 1877 @param[out] op_prolong Coarse to fine operator 1878 @param[out] op_restrict Fine to coarse operator 1879 1880 @return An error code: 0 - success, otherwise - failure 1881 1882 @ref User 1883 **/ 1884 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1885 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1886 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1887 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1888 int ierr; 1889 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1890 Ceed ceed; 1891 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1892 1893 // Check for compatible quadrature spaces 1894 CeedBasis basis_fine; 1895 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1896 CeedInt Q_f, Q_c; 1897 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1898 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1899 if (Q_f != Q_c) 1900 // LCOV_EXCL_START 1901 return CeedError(ceed, CEED_ERROR_DIMENSION, 1902 "Bases must have compatible quadrature spaces"); 1903 // LCOV_EXCL_STOP 1904 1905 // Coarse to fine basis 1906 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1907 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1908 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1909 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1910 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1911 CeedChk(ierr); 1912 P_1d_c = dim == 1 ? num_nodes_c : 1913 dim == 2 ? sqrt(num_nodes_c) : 1914 cbrt(num_nodes_c); 1915 CeedScalar *q_ref, *q_weight, *grad; 1916 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1917 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1918 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1919 CeedBasis basis_c_to_f; 1920 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1921 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1922 CeedChk(ierr); 1923 ierr = CeedFree(&q_ref); CeedChk(ierr); 1924 ierr = CeedFree(&q_weight); CeedChk(ierr); 1925 ierr = CeedFree(&grad); CeedChk(ierr); 1926 1927 // Core code 1928 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1929 basis_coarse, basis_c_to_f, op_coarse, 1930 op_prolong, op_restrict); 1931 CeedChk(ierr); 1932 return CEED_ERROR_SUCCESS; 1933 } 1934 1935 /** 1936 @brief Create a multigrid coarse operator and level transfer operators 1937 for a CeedOperator with a non-tensor basis for the active vector 1938 1939 Note: Calling this function asserts that setup is complete 1940 and sets the CeedOperator as immutable. 1941 1942 @param[in] op_fine Fine grid operator 1943 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1944 @param[in] rstr_coarse Coarse grid restriction 1945 @param[in] basis_coarse Coarse grid active vector basis 1946 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1947 @param[out] op_coarse Coarse grid operator 1948 @param[out] op_prolong Coarse to fine operator 1949 @param[out] op_restrict Fine to coarse operator 1950 1951 @return An error code: 0 - success, otherwise - failure 1952 1953 @ref User 1954 **/ 1955 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1956 CeedVector p_mult_fine, 1957 CeedElemRestriction rstr_coarse, 1958 CeedBasis basis_coarse, 1959 const CeedScalar *interp_c_to_f, 1960 CeedOperator *op_coarse, 1961 CeedOperator *op_prolong, 1962 CeedOperator *op_restrict) { 1963 int ierr; 1964 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1965 Ceed ceed; 1966 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1967 1968 // Check for compatible quadrature spaces 1969 CeedBasis basis_fine; 1970 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1971 CeedInt Q_f, Q_c; 1972 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1973 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1974 if (Q_f != Q_c) 1975 // LCOV_EXCL_START 1976 return CeedError(ceed, CEED_ERROR_DIMENSION, 1977 "Bases must have compatible quadrature spaces"); 1978 // LCOV_EXCL_STOP 1979 1980 // Coarse to fine basis 1981 CeedElemTopology topo; 1982 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1983 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1984 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1985 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1986 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1987 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1988 CeedChk(ierr); 1989 CeedScalar *q_ref, *q_weight, *grad; 1990 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 1991 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1992 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 1993 CeedBasis basis_c_to_f; 1994 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 1995 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1996 CeedChk(ierr); 1997 ierr = CeedFree(&q_ref); CeedChk(ierr); 1998 ierr = CeedFree(&q_weight); CeedChk(ierr); 1999 ierr = CeedFree(&grad); CeedChk(ierr); 2000 2001 // Core code 2002 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2003 basis_coarse, basis_c_to_f, op_coarse, 2004 op_prolong, op_restrict); 2005 CeedChk(ierr); 2006 return CEED_ERROR_SUCCESS; 2007 } 2008 2009 /** 2010 @brief Build a FDM based approximate inverse for each element for a 2011 CeedOperator 2012 2013 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2014 Method based approximate inverse. This function obtains the simultaneous 2015 diagonalization for the 1D mass and Laplacian operators, 2016 M = V^T V, K = V^T S V. 2017 The assembled QFunction is used to modify the eigenvalues from simultaneous 2018 diagonalization and obtain an approximate inverse of the form 2019 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2020 associated CeedQFunction must therefore also be linear. 2021 2022 Note: Calling this function asserts that setup is complete 2023 and sets the CeedOperator as immutable. 2024 2025 @param op CeedOperator to create element inverses 2026 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2027 for each element 2028 @param request Address of CeedRequest for non-blocking completion, else 2029 @ref CEED_REQUEST_IMMEDIATE 2030 2031 @return An error code: 0 - success, otherwise - failure 2032 2033 @ref User 2034 **/ 2035 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2036 CeedRequest *request) { 2037 int ierr; 2038 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2039 2040 // Use backend version, if available 2041 if (op->CreateFDMElementInverse) { 2042 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2043 return CEED_ERROR_SUCCESS; 2044 } else { 2045 // Check for valid fallback resource 2046 const char *resource, *fallback_resource; 2047 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2048 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2049 CeedChk(ierr); 2050 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2051 // Fallback to reference Ceed 2052 if (!op->op_fallback) { 2053 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2054 } 2055 // Assemble 2056 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2057 CeedChk(ierr); 2058 return CEED_ERROR_SUCCESS; 2059 } 2060 } 2061 2062 // Interface implementation 2063 Ceed ceed, ceed_parent; 2064 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2065 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2066 ceed_parent = ceed_parent ? ceed_parent : ceed; 2067 CeedQFunction qf; 2068 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2069 2070 // Determine active input basis 2071 bool interp = false, grad = false; 2072 CeedBasis basis = NULL; 2073 CeedElemRestriction rstr = NULL; 2074 CeedOperatorField *op_fields; 2075 CeedQFunctionField *qf_fields; 2076 CeedInt num_input_fields; 2077 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2078 CeedChk(ierr); 2079 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2080 for (CeedInt i=0; i<num_input_fields; i++) { 2081 CeedVector vec; 2082 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2083 if (vec == CEED_VECTOR_ACTIVE) { 2084 CeedEvalMode eval_mode; 2085 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2086 interp = interp || eval_mode == CEED_EVAL_INTERP; 2087 grad = grad || eval_mode == CEED_EVAL_GRAD; 2088 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2089 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2090 } 2091 } 2092 if (!basis) 2093 // LCOV_EXCL_START 2094 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2095 // LCOV_EXCL_STOP 2096 CeedSize l_size = 1; 2097 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2098 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2099 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2100 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2101 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2102 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2103 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2104 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2105 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2106 2107 // Build and diagonalize 1D Mass and Laplacian 2108 bool tensor_basis; 2109 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2110 if (!tensor_basis) 2111 // LCOV_EXCL_START 2112 return CeedError(ceed, CEED_ERROR_BACKEND, 2113 "FDMElementInverse only supported for tensor " 2114 "bases"); 2115 // LCOV_EXCL_STOP 2116 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2117 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2118 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2119 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2120 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2121 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2122 // -- Build matrices 2123 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2124 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2125 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2126 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2127 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2128 mass, laplace); CeedChk(ierr); 2129 2130 // -- Diagonalize 2131 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2132 CeedChk(ierr); 2133 ierr = CeedFree(&mass); CeedChk(ierr); 2134 ierr = CeedFree(&laplace); CeedChk(ierr); 2135 for (CeedInt i=0; i<P_1d; i++) 2136 for (CeedInt j=0; j<P_1d; j++) 2137 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2138 ierr = CeedFree(&x); CeedChk(ierr); 2139 2140 // Assemble QFunction 2141 CeedVector assembled; 2142 CeedElemRestriction rstr_qf; 2143 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2144 &rstr_qf, request); CeedChk(ierr); 2145 CeedInt layout[3]; 2146 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2147 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2148 CeedScalar max_norm = 0; 2149 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2150 2151 // Calculate element averages 2152 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2153 CeedScalar *elem_avg; 2154 const CeedScalar *assembled_array, *q_weight_array; 2155 CeedVector q_weight; 2156 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2157 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2158 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2159 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2160 CeedChk(ierr); 2161 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2162 CeedChk(ierr); 2163 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2164 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2165 for (CeedInt e=0; e<num_elem; e++) { 2166 CeedInt count = 0; 2167 for (CeedInt q=0; q<num_qpts; q++) 2168 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2169 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2170 qf_value_bound) { 2171 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2172 q_weight_array[q]; 2173 count++; 2174 } 2175 if (count) { 2176 elem_avg[e] /= count; 2177 } else { 2178 elem_avg[e] = 1.0; 2179 } 2180 } 2181 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2182 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2183 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2184 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2185 2186 // Build FDM diagonal 2187 CeedVector q_data; 2188 CeedScalar *q_data_array, *fdm_diagonal; 2189 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2190 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2191 for (CeedInt c=0; c<num_comp; c++) 2192 for (CeedInt n=0; n<elem_size; n++) { 2193 if (interp) 2194 fdm_diagonal[c*elem_size + n] = 1.0; 2195 if (grad) 2196 for (CeedInt d=0; d<dim; d++) { 2197 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2198 fdm_diagonal[c*elem_size + n] += lambda[i]; 2199 } 2200 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2201 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2202 } 2203 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2204 CeedChk(ierr); 2205 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2206 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2207 CeedChk(ierr); 2208 for (CeedInt e=0; e<num_elem; e++) 2209 for (CeedInt c=0; c<num_comp; c++) 2210 for (CeedInt n=0; n<elem_size; n++) 2211 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2212 fdm_diagonal[c*elem_size + n]); 2213 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2214 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2215 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2216 2217 // Setup FDM operator 2218 // -- Basis 2219 CeedBasis fdm_basis; 2220 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2221 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2222 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2223 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2224 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2225 fdm_interp, grad_dummy, q_ref_dummy, 2226 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2227 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2228 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2229 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2230 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2231 ierr = CeedFree(&lambda); CeedChk(ierr); 2232 2233 // -- Restriction 2234 CeedElemRestriction rstr_qd_i; 2235 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2236 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2237 num_comp, num_elem*num_comp*elem_size, 2238 strides, &rstr_qd_i); CeedChk(ierr); 2239 // -- QFunction 2240 CeedQFunction qf_fdm; 2241 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2242 CeedChk(ierr); 2243 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2244 CeedChk(ierr); 2245 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2246 CeedChk(ierr); 2247 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2248 CeedChk(ierr); 2249 // -- QFunction context 2250 CeedInt *num_comp_data; 2251 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2252 num_comp_data[0] = num_comp; 2253 CeedQFunctionContext ctx_fdm; 2254 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2255 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2256 sizeof(*num_comp_data), num_comp_data); 2257 CeedChk(ierr); 2258 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2259 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2260 // -- Operator 2261 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2262 CeedChk(ierr); 2263 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2264 CeedChk(ierr); 2265 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2266 q_data); CeedChk(ierr); 2267 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2268 CeedChk(ierr); 2269 2270 // Cleanup 2271 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2272 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2273 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2274 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2275 2276 return CEED_ERROR_SUCCESS; 2277 } 2278 2279 /// @} 2280