1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->is_interface_setup = false; 75 op_ref->is_backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 79 &op_ref->qf_assembled); CeedChk(ierr); 80 op->op_fallback = op_ref; 81 82 // Clone QF 83 CeedQFunction qf_ref; 84 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 85 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 86 qf_ref->data = NULL; 87 qf_ref->ceed = ceed_ref; 88 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 89 op_ref->qf = qf_ref; 90 op->qf_fallback = qf_ref; 91 return CEED_ERROR_SUCCESS; 92 } 93 94 /** 95 @brief Select correct basis matrix pointer based on CeedEvalMode 96 97 @param[in] eval_mode Current basis evaluation mode 98 @param[in] identity Pointer to identity matrix 99 @param[in] interp Pointer to interpolation matrix 100 @param[in] grad Pointer to gradient matrix 101 @param[out] basis_ptr Basis pointer to set 102 103 @ref Developer 104 **/ 105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 106 const CeedScalar *identity, const CeedScalar *interp, 107 const CeedScalar *grad, const CeedScalar **basis_ptr) { 108 switch (eval_mode) { 109 case CEED_EVAL_NONE: 110 *basis_ptr = identity; 111 break; 112 case CEED_EVAL_INTERP: 113 *basis_ptr = interp; 114 break; 115 case CEED_EVAL_GRAD: 116 *basis_ptr = grad; 117 break; 118 case CEED_EVAL_WEIGHT: 119 case CEED_EVAL_DIV: 120 case CEED_EVAL_CURL: 121 break; // Caught by QF Assembly 122 } 123 } 124 125 /** 126 @brief Create point block restriction for active operator field 127 128 @param[in] rstr Original CeedElemRestriction for active field 129 @param[out] pointblock_rstr Address of the variable where the newly created 130 CeedElemRestriction will be stored 131 132 @return An error code: 0 - success, otherwise - failure 133 134 @ref Developer 135 **/ 136 static int CeedOperatorCreateActivePointBlockRestriction( 137 CeedElemRestriction rstr, 138 CeedElemRestriction *pointblock_rstr) { 139 int ierr; 140 Ceed ceed; 141 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 142 const CeedInt *offsets; 143 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 144 CeedChk(ierr); 145 146 // Expand offsets 147 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 148 *pointblock_offsets; 149 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 152 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 153 CeedInt shift = num_comp; 154 if (comp_stride != 1) 155 shift *= num_comp; 156 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 157 CeedChk(ierr); 158 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 159 pointblock_offsets[i] = offsets[i]*shift; 160 if (pointblock_offsets[i] > max) 161 max = pointblock_offsets[i]; 162 } 163 164 // Create new restriction 165 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 166 1, max + num_comp*num_comp, CEED_MEM_HOST, 167 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 168 CeedChk(ierr); 169 170 // Cleanup 171 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 172 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else 181 CEED_REQUEST_IMMEDIATE 182 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 183 @param[out] assembled CeedVector to store assembled diagonal 184 185 @return An error code: 0 - success, otherwise - failure 186 187 @ref Developer 188 **/ 189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 190 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 191 int ierr; 192 Ceed ceed; 193 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 194 195 // Assemble QFunction 196 CeedQFunction qf; 197 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 198 CeedInt num_input_fields, num_output_fields; 199 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 200 CeedChk(ierr); 201 CeedVector assembled_qf; 202 CeedElemRestriction rstr; 203 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 204 &rstr, request); CeedChk(ierr); 205 CeedInt layout[3]; 206 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 207 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 208 CeedScalar max_norm = 0; 209 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 210 211 // Determine active input basis 212 CeedOperatorField *op_fields; 213 CeedQFunctionField *qf_fields; 214 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 215 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 216 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 217 CeedEvalMode *eval_mode_in = NULL; 218 CeedBasis basis_in = NULL; 219 CeedElemRestriction rstr_in = NULL; 220 for (CeedInt i=0; i<num_input_fields; i++) { 221 CeedVector vec; 222 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 223 if (vec == CEED_VECTOR_ACTIVE) { 224 CeedElemRestriction rstr; 225 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 226 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 227 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 228 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 229 if (rstr_in && rstr_in != rstr) 230 // LCOV_EXCL_START 231 return CeedError(ceed, CEED_ERROR_BACKEND, 232 "Multi-field non-composite operator diagonal assembly not supported"); 233 // LCOV_EXCL_STOP 234 rstr_in = rstr; 235 CeedEvalMode eval_mode; 236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 237 switch (eval_mode) { 238 case CEED_EVAL_NONE: 239 case CEED_EVAL_INTERP: 240 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 241 eval_mode_in[num_eval_mode_in] = eval_mode; 242 num_eval_mode_in += 1; 243 break; 244 case CEED_EVAL_GRAD: 245 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 246 for (CeedInt d=0; d<dim; d++) 247 eval_mode_in[num_eval_mode_in+d] = eval_mode; 248 num_eval_mode_in += dim; 249 break; 250 case CEED_EVAL_WEIGHT: 251 case CEED_EVAL_DIV: 252 case CEED_EVAL_CURL: 253 break; // Caught by QF Assembly 254 } 255 } 256 } 257 258 // Determine active output basis 259 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 260 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 261 CeedInt num_eval_mode_out = 0; 262 CeedEvalMode *eval_mode_out = NULL; 263 CeedBasis basis_out = NULL; 264 CeedElemRestriction rstr_out = NULL; 265 for (CeedInt i=0; i<num_output_fields; i++) { 266 CeedVector vec; 267 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 268 if (vec == CEED_VECTOR_ACTIVE) { 269 CeedElemRestriction rstr; 270 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 271 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 272 if (rstr_out && rstr_out != rstr) 273 // LCOV_EXCL_START 274 return CeedError(ceed, CEED_ERROR_BACKEND, 275 "Multi-field non-composite operator diagonal assembly not supported"); 276 // LCOV_EXCL_STOP 277 rstr_out = rstr; 278 CeedEvalMode eval_mode; 279 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 280 switch (eval_mode) { 281 case CEED_EVAL_NONE: 282 case CEED_EVAL_INTERP: 283 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 284 eval_mode_out[num_eval_mode_out] = eval_mode; 285 num_eval_mode_out += 1; 286 break; 287 case CEED_EVAL_GRAD: 288 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 289 for (CeedInt d=0; d<dim; d++) 290 eval_mode_out[num_eval_mode_out+d] = eval_mode; 291 num_eval_mode_out += dim; 292 break; 293 case CEED_EVAL_WEIGHT: 294 case CEED_EVAL_DIV: 295 case CEED_EVAL_CURL: 296 break; // Caught by QF Assembly 297 } 298 } 299 } 300 301 // Assemble point block diagonal restriction, if needed 302 CeedElemRestriction diag_rstr = rstr_out; 303 if (is_pointblock) { 304 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 305 CeedChk(ierr); 306 } 307 308 // Create diagonal vector 309 CeedVector elem_diag; 310 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 311 CeedChk(ierr); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array; 315 const CeedScalar *assembled_qf_array; 316 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 317 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 318 CeedChk(ierr); 319 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 320 CeedChk(ierr); 321 CeedInt num_elem, num_nodes, num_qpts; 322 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 323 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 324 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 325 // Basis matrices 326 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 327 CeedScalar *identity = NULL; 328 bool evalNone = false; 329 for (CeedInt i=0; i<num_eval_mode_in; i++) 330 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 331 for (CeedInt i=0; i<num_eval_mode_out; i++) 332 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 333 if (evalNone) { 334 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 335 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 336 identity[i*num_nodes+i] = 1.0; 337 } 338 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 339 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 340 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 341 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 342 // Compute the diagonal of B^T D B 343 // Each element 344 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 345 for (CeedInt e=0; e<num_elem; e++) { 346 CeedInt d_out = -1; 347 // Each basis eval mode pair 348 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 349 const CeedScalar *bt = NULL; 350 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 351 d_out += 1; 352 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 353 &grad_out[d_out*num_qpts*num_nodes], &bt); 354 CeedInt d_in = -1; 355 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 356 const CeedScalar *b = NULL; 357 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 358 d_in += 1; 359 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 360 &grad_in[d_in*num_qpts*num_nodes], &b); 361 // Each component 362 for (CeedInt c_out=0; c_out<num_comp; c_out++) 363 // Each qpoint/node pair 364 for (CeedInt q=0; q<num_qpts; q++) 365 if (is_pointblock) { 366 // Point Block Diagonal 367 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 368 const CeedScalar qf_value = 369 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 370 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 371 if (fabs(qf_value) > qf_value_bound) 372 for (CeedInt n=0; n<num_nodes; n++) 373 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 374 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 375 } 376 } else { 377 // Diagonal Only 378 const CeedScalar qf_value = 379 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 380 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 381 if (fabs(qf_value) > qf_value_bound) 382 for (CeedInt n=0; n<num_nodes; n++) 383 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 384 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 385 } 386 } 387 } 388 } 389 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 390 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 391 CeedChk(ierr); 392 393 // Assemble local operator diagonal 394 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 395 assembled, request); CeedChk(ierr); 396 397 // Cleanup 398 if (is_pointblock) { 399 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 400 } 401 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 402 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 403 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 404 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 405 ierr = CeedFree(&identity); CeedChk(ierr); 406 407 return CEED_ERROR_SUCCESS; 408 } 409 410 /** 411 @brief Core logic for assembling composite operator diagonal 412 413 @param[in] op CeedOperator to assemble point block diagonal 414 @param[in] request Address of CeedRequest for non-blocking completion, else 415 CEED_REQUEST_IMMEDIATE 416 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 417 @param[out] assembled CeedVector to store assembled diagonal 418 419 @return An error code: 0 - success, otherwise - failure 420 421 @ref Developer 422 **/ 423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 424 CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 int ierr; 427 CeedInt num_sub; 428 CeedOperator *suboperators; 429 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 430 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 431 for (CeedInt i = 0; i < num_sub; i++) { 432 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 433 is_pointblock, assembled); CeedChk(ierr); 434 } 435 return CEED_ERROR_SUCCESS; 436 } 437 438 /** 439 @brief Build nonzero pattern for non-composite operator 440 441 Users should generally use CeedOperatorLinearAssembleSymbolic() 442 443 @param[in] op CeedOperator to assemble nonzero pattern 444 @param[in] offset Offset for number of entries 445 @param[out] rows Row number for each entry 446 @param[out] cols Column number for each entry 447 448 @return An error code: 0 - success, otherwise - failure 449 450 @ref Developer 451 **/ 452 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 453 CeedInt *rows, CeedInt *cols) { 454 int ierr; 455 Ceed ceed = op->ceed; 456 if (op->is_composite) 457 // LCOV_EXCL_START 458 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 459 "Composite operator not supported"); 460 // LCOV_EXCL_STOP 461 462 CeedElemRestriction rstr_in; 463 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 464 CeedSize num_nodes; 465 CeedInt num_elem, elem_size, num_comp; 466 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 467 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 468 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 469 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 470 CeedInt layout_er[3]; 471 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 472 473 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 474 475 // Determine elem_dof relation 476 CeedVector index_vec; 477 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 478 CeedScalar *array; 479 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 480 for (CeedInt i = 0; i < num_nodes; ++i) { 481 array[i] = i; 482 } 483 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 484 CeedVector elem_dof; 485 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 486 CeedChk(ierr); 487 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 488 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 489 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 490 const CeedScalar *elem_dof_a; 491 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 492 CeedChk(ierr); 493 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 494 495 // Determine i, j locations for element matrices 496 CeedInt count = 0; 497 for (int e = 0; e < num_elem; ++e) { 498 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 499 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 500 for (int i = 0; i < elem_size; ++i) { 501 for (int j = 0; j < elem_size; ++j) { 502 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 503 (comp_out)*layout_er[1] + e*layout_er[2]; 504 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 505 (comp_in)*layout_er[1] + e*layout_er[2]; 506 507 const CeedInt row = elem_dof_a[elem_dof_index_row]; 508 const CeedInt col = elem_dof_a[elem_dof_index_col]; 509 510 rows[offset + count] = row; 511 cols[offset + count] = col; 512 count++; 513 } 514 } 515 } 516 } 517 } 518 if (count != local_num_entries) 519 // LCOV_EXCL_START 520 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 521 // LCOV_EXCL_STOP 522 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 523 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 524 525 return CEED_ERROR_SUCCESS; 526 } 527 528 /** 529 @brief Assemble nonzero entries for non-composite operator 530 531 Users should generally use CeedOperatorLinearAssemble() 532 533 @param[in] op CeedOperator to assemble 534 @param[out] offset Offest for number of entries 535 @param[out] values Values to assemble into matrix 536 537 @return An error code: 0 - success, otherwise - failure 538 539 @ref Developer 540 **/ 541 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 542 CeedVector values) { 543 int ierr; 544 Ceed ceed = op->ceed; 545 if (op->is_composite) 546 // LCOV_EXCL_START 547 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 548 "Composite operator not supported"); 549 // LCOV_EXCL_STOP 550 551 // Assemble QFunction 552 CeedQFunction qf; 553 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 554 CeedVector assembled_qf; 555 CeedElemRestriction rstr_q; 556 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 557 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 558 559 CeedSize qf_length; 560 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 561 562 CeedInt num_input_fields, num_output_fields; 563 CeedOperatorField *input_fields; 564 CeedOperatorField *output_fields; 565 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 566 &num_output_fields, &output_fields); CeedChk(ierr); 567 568 // Determine active input basis 569 CeedQFunctionField *qf_fields; 570 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 571 CeedInt num_eval_mode_in = 0, dim = 1; 572 CeedEvalMode *eval_mode_in = NULL; 573 CeedBasis basis_in = NULL; 574 CeedElemRestriction rstr_in = NULL; 575 for (CeedInt i=0; i<num_input_fields; i++) { 576 CeedVector vec; 577 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 578 if (vec == CEED_VECTOR_ACTIVE) { 579 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 580 CeedChk(ierr); 581 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 582 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 583 CeedChk(ierr); 584 CeedEvalMode eval_mode; 585 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 586 CeedChk(ierr); 587 switch (eval_mode) { 588 case CEED_EVAL_NONE: 589 case CEED_EVAL_INTERP: 590 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 591 eval_mode_in[num_eval_mode_in] = eval_mode; 592 num_eval_mode_in += 1; 593 break; 594 case CEED_EVAL_GRAD: 595 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 596 for (CeedInt d=0; d<dim; d++) { 597 eval_mode_in[num_eval_mode_in+d] = eval_mode; 598 } 599 num_eval_mode_in += dim; 600 break; 601 case CEED_EVAL_WEIGHT: 602 case CEED_EVAL_DIV: 603 case CEED_EVAL_CURL: 604 break; // Caught by QF Assembly 605 } 606 } 607 } 608 609 // Determine active output basis 610 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 611 CeedInt num_eval_mode_out = 0; 612 CeedEvalMode *eval_mode_out = NULL; 613 CeedBasis basis_out = NULL; 614 CeedElemRestriction rstr_out = NULL; 615 for (CeedInt i=0; i<num_output_fields; i++) { 616 CeedVector vec; 617 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 618 if (vec == CEED_VECTOR_ACTIVE) { 619 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 620 CeedChk(ierr); 621 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 622 CeedChk(ierr); 623 CeedEvalMode eval_mode; 624 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 625 CeedChk(ierr); 626 switch (eval_mode) { 627 case CEED_EVAL_NONE: 628 case CEED_EVAL_INTERP: 629 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 630 eval_mode_out[num_eval_mode_out] = eval_mode; 631 num_eval_mode_out += 1; 632 break; 633 case CEED_EVAL_GRAD: 634 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 635 for (CeedInt d=0; d<dim; d++) { 636 eval_mode_out[num_eval_mode_out+d] = eval_mode; 637 } 638 num_eval_mode_out += dim; 639 break; 640 case CEED_EVAL_WEIGHT: 641 case CEED_EVAL_DIV: 642 case CEED_EVAL_CURL: 643 break; // Caught by QF Assembly 644 } 645 } 646 } 647 648 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 649 // LCOV_EXCL_START 650 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 651 "Cannot assemble operator with out inputs/outputs"); 652 // LCOV_EXCL_STOP 653 654 CeedInt num_elem, elem_size, num_qpts, num_comp; 655 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 656 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 657 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 658 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 659 660 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 661 662 // loop over elements and put in data structure 663 const CeedScalar *interp_in, *grad_in; 664 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 665 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 666 667 const CeedScalar *assembled_qf_array; 668 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 669 CeedChk(ierr); 670 671 CeedInt layout_qf[3]; 672 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 673 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 674 675 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 676 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 677 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 678 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 679 num_qpts]; // logically 3-tensor 680 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 681 CeedScalar elem_mat[elem_size * elem_size]; 682 int count = 0; 683 CeedScalar *vals; 684 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 685 for (int e = 0; e < num_elem; ++e) { 686 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 687 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 688 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 689 B_mat_in[ell] = 0.0; 690 } 691 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 692 B_mat_out[ell] = 0.0; 693 } 694 // Store block-diagonal D matrix as collection of small dense blocks 695 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 696 D_mat[ell] = 0.0; 697 } 698 // form element matrix itself (for each block component) 699 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 700 elem_mat[ell] = 0.0; 701 } 702 for (int q = 0; q < num_qpts; ++q) { 703 for (int n = 0; n < elem_size; ++n) { 704 CeedInt d_in = -1; 705 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 706 const int qq = num_eval_mode_in*q; 707 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 708 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 709 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 710 d_in += 1; 711 B_mat_in[(qq+e_in)*elem_size + n] += 712 grad_in[(d_in*num_qpts+q) * elem_size + n]; 713 } else { 714 // LCOV_EXCL_START 715 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 716 // LCOV_EXCL_STOP 717 } 718 } 719 CeedInt d_out = -1; 720 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 721 const int qq = num_eval_mode_out*q; 722 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 723 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 724 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 725 d_out += 1; 726 B_mat_out[(qq+e_out)*elem_size + n] += 727 grad_in[(d_out*num_qpts+q) * elem_size + n]; 728 } else { 729 // LCOV_EXCL_START 730 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 731 // LCOV_EXCL_STOP 732 } 733 } 734 } 735 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 736 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 737 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 738 +comp_out; 739 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 740 e*layout_qf[2]; 741 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 742 } 743 } 744 } 745 // Compute B^T*D 746 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 747 BTD[ell] = 0.0; 748 } 749 for (int j = 0; j<elem_size; ++j) { 750 for (int q = 0; q<num_qpts; ++q) { 751 int qq = num_eval_mode_out*q; 752 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 753 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 754 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 755 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 756 } 757 } 758 } 759 } 760 761 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 762 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 763 764 // put element matrix in coordinate data structure 765 for (int i = 0; i < elem_size; ++i) { 766 for (int j = 0; j < elem_size; ++j) { 767 vals[offset + count] = elem_mat[i*elem_size + j]; 768 count++; 769 } 770 } 771 } 772 } 773 } 774 if (count != local_num_entries) 775 // LCOV_EXCL_START 776 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 777 // LCOV_EXCL_STOP 778 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 779 780 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 781 CeedChk(ierr); 782 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 783 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 784 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 785 786 return CEED_ERROR_SUCCESS; 787 } 788 789 /** 790 @brief Count number of entries for assembled CeedOperator 791 792 @param[in] op CeedOperator to assemble 793 @param[out] num_entries Number of entries in assembled representation 794 795 @return An error code: 0 - success, otherwise - failure 796 797 @ref Utility 798 **/ 799 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 800 CeedInt *num_entries) { 801 int ierr; 802 CeedElemRestriction rstr; 803 CeedInt num_elem, elem_size, num_comp; 804 805 if (op->is_composite) 806 // LCOV_EXCL_START 807 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 808 "Composite operator not supported"); 809 // LCOV_EXCL_STOP 810 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 811 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 812 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 813 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 814 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 815 816 return CEED_ERROR_SUCCESS; 817 } 818 819 /** 820 @brief Common code for creating a multigrid coarse operator and level 821 transfer operators for a CeedOperator 822 823 @param[in] op_fine Fine grid operator 824 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 825 @param[in] rstr_coarse Coarse grid restriction 826 @param[in] basis_coarse Coarse grid active vector basis 827 @param[in] basis_c_to_f Basis for coarse to fine interpolation 828 @param[out] op_coarse Coarse grid operator 829 @param[out] op_prolong Coarse to fine operator 830 @param[out] op_restrict Fine to coarse operator 831 832 @return An error code: 0 - success, otherwise - failure 833 834 @ref Developer 835 **/ 836 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 837 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 838 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 839 CeedOperator *op_restrict) { 840 int ierr; 841 Ceed ceed; 842 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 843 844 // Check for composite operator 845 bool is_composite; 846 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 847 if (is_composite) 848 // LCOV_EXCL_START 849 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 850 "Automatic multigrid setup for composite operators not supported"); 851 // LCOV_EXCL_STOP 852 853 // Coarse Grid 854 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 855 op_coarse); CeedChk(ierr); 856 CeedElemRestriction rstr_fine = NULL; 857 // -- Clone input fields 858 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 859 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 860 rstr_fine = op_fine->input_fields[i]->elem_restr; 861 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 862 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 863 CeedChk(ierr); 864 } else { 865 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 866 op_fine->input_fields[i]->elem_restr, 867 op_fine->input_fields[i]->basis, 868 op_fine->input_fields[i]->vec); CeedChk(ierr); 869 } 870 } 871 // -- Clone output fields 872 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 873 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 874 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 875 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 876 CeedChk(ierr); 877 } else { 878 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 879 op_fine->output_fields[i]->elem_restr, 880 op_fine->output_fields[i]->basis, 881 op_fine->output_fields[i]->vec); CeedChk(ierr); 882 } 883 } 884 // -- Clone QFunctionAssemblyData 885 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 886 &(*op_coarse)->qf_assembled); CeedChk(ierr); 887 888 // Multiplicity vector 889 CeedVector mult_vec, mult_e_vec; 890 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 891 CeedChk(ierr); 892 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 893 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 894 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 895 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 896 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 897 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 898 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 899 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 900 901 // Restriction 902 CeedInt num_comp; 903 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 904 CeedQFunction qf_restrict; 905 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 906 CeedChk(ierr); 907 CeedInt *num_comp_r_data; 908 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 909 num_comp_r_data[0] = num_comp; 910 CeedQFunctionContext ctx_r; 911 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 912 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 913 sizeof(*num_comp_r_data), num_comp_r_data); 914 CeedChk(ierr); 915 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 916 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 917 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 918 CeedChk(ierr); 919 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 920 CeedChk(ierr); 921 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 922 CEED_EVAL_INTERP); CeedChk(ierr); 923 924 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 925 CEED_QFUNCTION_NONE, op_restrict); 926 CeedChk(ierr); 927 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 928 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 929 CeedChk(ierr); 930 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 931 CEED_BASIS_COLLOCATED, mult_vec); 932 CeedChk(ierr); 933 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 934 CEED_VECTOR_ACTIVE); CeedChk(ierr); 935 936 // Prolongation 937 CeedQFunction qf_prolong; 938 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 939 CeedChk(ierr); 940 CeedInt *num_comp_p_data; 941 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 942 num_comp_p_data[0] = num_comp; 943 CeedQFunctionContext ctx_p; 944 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 945 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 946 sizeof(*num_comp_p_data), num_comp_p_data); 947 CeedChk(ierr); 948 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 949 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 950 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 951 CeedChk(ierr); 952 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 953 CeedChk(ierr); 954 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 955 CeedChk(ierr); 956 957 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 958 CEED_QFUNCTION_NONE, op_prolong); 959 CeedChk(ierr); 960 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 961 CEED_VECTOR_ACTIVE); CeedChk(ierr); 962 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 963 CEED_BASIS_COLLOCATED, mult_vec); 964 CeedChk(ierr); 965 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 966 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 967 CeedChk(ierr); 968 969 // Cleanup 970 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 971 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 972 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 973 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 974 return CEED_ERROR_SUCCESS; 975 } 976 977 /** 978 @brief Build 1D mass matrix and Laplacian with perturbation 979 980 @param[in] interp_1d Interpolation matrix in one dimension 981 @param[in] grad_1d Gradient matrix in one dimension 982 @param[in] q_weight_1d Quadrature weights in one dimension 983 @param[in] P_1d Number of basis nodes in one dimension 984 @param[in] Q_1d Number of quadrature points in one dimension 985 @param[in] dim Dimension of basis 986 @param[out] mass Assembled mass matrix in one dimension 987 @param[out] laplace Assembled perturbed Laplacian in one dimension 988 989 @return An error code: 0 - success, otherwise - failure 990 991 @ref Developer 992 **/ 993 CeedPragmaOptimizeOff 994 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 995 const CeedScalar *grad_1d, 996 const CeedScalar *q_weight_1d, CeedInt P_1d, 997 CeedInt Q_1d, CeedInt dim, 998 CeedScalar *mass, CeedScalar *laplace) { 999 1000 for (CeedInt i=0; i<P_1d; i++) 1001 for (CeedInt j=0; j<P_1d; j++) { 1002 CeedScalar sum = 0.0; 1003 for (CeedInt k=0; k<Q_1d; k++) 1004 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1005 mass[i+j*P_1d] = sum; 1006 } 1007 // -- Laplacian 1008 for (CeedInt i=0; i<P_1d; i++) 1009 for (CeedInt j=0; j<P_1d; j++) { 1010 CeedScalar sum = 0.0; 1011 for (CeedInt k=0; k<Q_1d; k++) 1012 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1013 laplace[i+j*P_1d] = sum; 1014 } 1015 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1016 for (CeedInt i=0; i<P_1d; i++) 1017 laplace[i+P_1d*i] += perturbation; 1018 return CEED_ERROR_SUCCESS; 1019 } 1020 CeedPragmaOptimizeOn 1021 1022 /// @} 1023 1024 /// ---------------------------------------------------------------------------- 1025 /// CeedOperator Backend API 1026 /// ---------------------------------------------------------------------------- 1027 /// @addtogroup CeedOperatorBackend 1028 /// @{ 1029 1030 /** 1031 @brief Create object holding CeedQFunction assembly data for CeedOperator 1032 1033 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1034 @param[out] data Address of the variable where the newly created 1035 CeedQFunctionAssemblyData will be stored 1036 1037 @return An error code: 0 - success, otherwise - failure 1038 1039 @ref Backend 1040 **/ 1041 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1042 CeedQFunctionAssemblyData *data) { 1043 int ierr; 1044 1045 ierr = CeedCalloc(1, data); CeedChk(ierr); 1046 (*data)->ref_count = 1; 1047 (*data)->ceed = ceed; 1048 ierr = CeedReference(ceed); CeedChk(ierr); 1049 1050 return CEED_ERROR_SUCCESS; 1051 } 1052 1053 /** 1054 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1055 1056 @param data CeedQFunctionAssemblyData to increment the reference counter 1057 1058 @return An error code: 0 - success, otherwise - failure 1059 1060 @ref Backend 1061 **/ 1062 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1063 data->ref_count++; 1064 return CEED_ERROR_SUCCESS; 1065 } 1066 1067 /** 1068 @brief Mark QFunctionAssemblyData as stale 1069 1070 @param data CeedQFunctionAssemblyData to mark as stale 1071 @param is_update_needed Boolean flag indicating if update is needed or completed 1072 1073 @return An error code: 0 - success, otherwise - failure 1074 1075 @ref Backend 1076 **/ 1077 int CeedQFunctionAssemblyDataSetQFunctionUpdated(CeedQFunctionAssemblyData data, 1078 bool is_update_needed) { 1079 data->is_update_flag_used = data->is_update_flag_used || is_update_needed; 1080 data->is_update_needed = is_update_needed; 1081 return CEED_ERROR_SUCCESS; 1082 } 1083 1084 /** 1085 @brief Determine if QFunctionAssemblyData needs update 1086 1087 @param[in] data CeedQFunctionAssemblyData to mark as stale 1088 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1089 1090 @return An error code: 0 - success, otherwise - failure 1091 1092 @ref Backend 1093 **/ 1094 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1095 bool *is_update_needed) { 1096 *is_update_needed = !data->is_update_flag_used || data->is_update_needed; 1097 return CEED_ERROR_SUCCESS; 1098 } 1099 1100 /** 1101 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1102 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1103 Note: If `*data_copy` is non-NULL, then it is assumed that 1104 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1105 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1106 the only reference to this CeedQFunctionAssemblyData. 1107 1108 @param data CeedQFunctionAssemblyData to copy reference to 1109 @param[out] data_copy Variable to store copied reference 1110 1111 @return An error code: 0 - success, otherwise - failure 1112 1113 @ref Backend 1114 **/ 1115 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1116 CeedQFunctionAssemblyData *data_copy) { 1117 int ierr; 1118 1119 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1120 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1121 *data_copy = data; 1122 return CEED_ERROR_SUCCESS; 1123 } 1124 1125 /** 1126 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1127 1128 @param[in] data CeedQFunctionAssemblyData to retreive status 1129 @param[out] is_setup Boolean flag for setup status 1130 1131 @return An error code: 0 - success, otherwise - failure 1132 1133 @ref Backend 1134 **/ 1135 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1136 bool *is_setup) { 1137 *is_setup = data->is_setup; 1138 return CEED_ERROR_SUCCESS; 1139 } 1140 1141 /** 1142 @brief Set internal objects for CeedQFunctionAssemblyData 1143 1144 @param[in] data CeedQFunctionAssemblyData to set objects 1145 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1146 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1147 1148 @return An error code: 0 - success, otherwise - failure 1149 1150 @ref Backend 1151 **/ 1152 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1153 CeedVector vec, CeedElemRestriction rstr) { 1154 int ierr; 1155 1156 ierr = CeedVectorDestroy(&data->vec); CeedChk(ierr); 1157 data->vec = vec; 1158 1159 ierr = CeedElemRestrictionDestroy(&data->rstr); CeedChk(ierr); 1160 data->rstr = rstr; 1161 1162 data->is_setup = true; 1163 return CEED_ERROR_SUCCESS; 1164 } 1165 1166 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1167 CeedVector *vec, CeedElemRestriction *rstr) { 1168 if (!data->is_setup) 1169 // LCOV_EXCL_START 1170 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1171 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1172 // LCOV_EXCL_STOP 1173 1174 *vec = data->vec; 1175 *rstr = data->rstr; 1176 1177 return CEED_ERROR_SUCCESS; 1178 } 1179 1180 /** 1181 @brief Destroy CeedQFunctionAssemblyData 1182 1183 @param[out] data CeedQFunctionAssemblyData to destroy 1184 1185 @return An error code: 0 - success, otherwise - failure 1186 1187 @ref Backend 1188 **/ 1189 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1190 int ierr; 1191 1192 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1193 1194 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1195 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1196 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1197 1198 ierr = CeedFree(data); CeedChk(ierr); 1199 return CEED_ERROR_SUCCESS; 1200 } 1201 1202 /// @} 1203 1204 /// ---------------------------------------------------------------------------- 1205 /// CeedOperator Public API 1206 /// ---------------------------------------------------------------------------- 1207 /// @addtogroup CeedOperatorUser 1208 /// @{ 1209 1210 /** 1211 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1212 1213 This returns a CeedVector containing a matrix at each quadrature point 1214 providing the action of the CeedQFunction associated with the CeedOperator. 1215 The vector 'assembled' is of shape 1216 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1217 and contains column-major matrices representing the action of the 1218 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1219 outputs are in the order provided by the user when adding CeedOperator fields. 1220 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1221 'v', provided in that order, would result in an assembled QFunction that 1222 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1223 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1224 1225 Note: Calling this function asserts that setup is complete 1226 and sets the CeedOperator as immutable. 1227 1228 @param op CeedOperator to assemble CeedQFunction 1229 @param[out] assembled CeedVector to store assembled CeedQFunction at 1230 quadrature points 1231 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1232 CeedQFunction 1233 @param request Address of CeedRequest for non-blocking completion, else 1234 @ref CEED_REQUEST_IMMEDIATE 1235 1236 @return An error code: 0 - success, otherwise - failure 1237 1238 @ref User 1239 **/ 1240 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1241 CeedElemRestriction *rstr, 1242 CeedRequest *request) { 1243 int ierr; 1244 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1245 1246 // Backend version 1247 if (op->LinearAssembleQFunction) { 1248 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1249 CeedChk(ierr); 1250 } else { 1251 // Fallback to reference Ceed 1252 if (!op->op_fallback) { 1253 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1254 } 1255 // Assemble 1256 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1257 rstr, request); CeedChk(ierr); 1258 } 1259 return CEED_ERROR_SUCCESS; 1260 } 1261 1262 /** 1263 @brief Assemble CeedQFunction and store result internall. Return copied 1264 references of stored data to the caller. Caller is responsible for 1265 ownership and destruction of the copied references. See also 1266 @ref CeedOperatorLinearAssembleQFunction 1267 1268 @param op CeedOperator to assemble CeedQFunction 1269 @param assembled CeedVector to store assembled CeedQFunction at 1270 quadrature points 1271 @param rstr CeedElemRestriction for CeedVector containing assembled 1272 CeedQFunction 1273 @param request Address of CeedRequest for non-blocking completion, else 1274 @ref CEED_REQUEST_IMMEDIATE 1275 1276 @return An error code: 0 - success, otherwise - failure 1277 1278 @ref User 1279 **/ 1280 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1281 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1282 int ierr; 1283 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1284 1285 // Backend version 1286 if (op->LinearAssembleQFunctionUpdate) { 1287 bool qf_assembled_is_setup; 1288 CeedVector assembled_vec; 1289 CeedElemRestriction assembled_rstr; 1290 1291 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1292 &qf_assembled_is_setup); CeedChk(ierr); 1293 if (qf_assembled_is_setup) { 1294 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1295 &assembled_rstr); CeedChk(ierr); 1296 1297 bool update_needed; 1298 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1299 &update_needed); CeedChk(ierr); 1300 if (update_needed) { 1301 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1302 request); CeedChk(ierr); 1303 } 1304 } else { 1305 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1306 request); CeedChk(ierr); 1307 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1308 assembled_rstr); CeedChk(ierr); 1309 } 1310 ierr = CeedQFunctionAssemblyDataSetQFunctionUpdated(op->qf_assembled, false); 1311 CeedChk(ierr); 1312 // Copy reference to internally held copy 1313 *assembled = NULL; 1314 *rstr = NULL; 1315 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1316 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1317 } else { 1318 // Fallback to reference Ceed 1319 if (!op->op_fallback) { 1320 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1321 } 1322 // Assemble 1323 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1324 assembled, rstr, request); CeedChk(ierr); 1325 } 1326 1327 return CEED_ERROR_SUCCESS; 1328 } 1329 1330 /** 1331 @brief Assemble the diagonal of a square linear CeedOperator 1332 1333 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1334 1335 Note: Currently only non-composite CeedOperators with a single field and 1336 composite CeedOperators with single field sub-operators are supported. 1337 1338 Note: Calling this function asserts that setup is complete 1339 and sets the CeedOperator as immutable. 1340 1341 @param op CeedOperator to assemble CeedQFunction 1342 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1343 @param request Address of CeedRequest for non-blocking completion, else 1344 @ref CEED_REQUEST_IMMEDIATE 1345 1346 @return An error code: 0 - success, otherwise - failure 1347 1348 @ref User 1349 **/ 1350 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1351 CeedRequest *request) { 1352 int ierr; 1353 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1354 1355 // Use backend version, if available 1356 if (op->LinearAssembleDiagonal) { 1357 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1358 return CEED_ERROR_SUCCESS; 1359 } else if (op->LinearAssembleAddDiagonal) { 1360 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1361 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1362 return CEED_ERROR_SUCCESS; 1363 } else { 1364 // Check for valid fallback resource 1365 const char *resource, *fallback_resource; 1366 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1367 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1368 CeedChk(ierr); 1369 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1370 // Fallback to reference Ceed 1371 if (!op->op_fallback) { 1372 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1373 } 1374 // Assemble 1375 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1376 CeedChk(ierr); 1377 return CEED_ERROR_SUCCESS; 1378 } 1379 } 1380 1381 // Default interface implementation 1382 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1383 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1384 CeedChk(ierr); 1385 return CEED_ERROR_SUCCESS; 1386 } 1387 1388 /** 1389 @brief Assemble the diagonal of a square linear CeedOperator 1390 1391 This sums into a CeedVector the diagonal of a linear CeedOperator. 1392 1393 Note: Currently only non-composite CeedOperators with a single field and 1394 composite CeedOperators with single field sub-operators are supported. 1395 1396 Note: Calling this function asserts that setup is complete 1397 and sets the CeedOperator as immutable. 1398 1399 @param op CeedOperator to assemble CeedQFunction 1400 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1401 @param request Address of CeedRequest for non-blocking completion, else 1402 @ref CEED_REQUEST_IMMEDIATE 1403 1404 @return An error code: 0 - success, otherwise - failure 1405 1406 @ref User 1407 **/ 1408 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1409 CeedRequest *request) { 1410 int ierr; 1411 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1412 1413 // Use backend version, if available 1414 if (op->LinearAssembleAddDiagonal) { 1415 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1416 return CEED_ERROR_SUCCESS; 1417 } else { 1418 // Check for valid fallback resource 1419 const char *resource, *fallback_resource; 1420 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1421 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1422 CeedChk(ierr); 1423 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1424 // Fallback to reference Ceed 1425 if (!op->op_fallback) { 1426 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1427 } 1428 // Assemble 1429 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1430 request); CeedChk(ierr); 1431 return CEED_ERROR_SUCCESS; 1432 } 1433 } 1434 1435 // Default interface implementation 1436 bool is_composite; 1437 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1438 if (is_composite) { 1439 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1440 false, assembled); CeedChk(ierr); 1441 return CEED_ERROR_SUCCESS; 1442 } else { 1443 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1444 CeedChk(ierr); 1445 return CEED_ERROR_SUCCESS; 1446 } 1447 } 1448 1449 /** 1450 @brief Assemble the point block diagonal of a square linear CeedOperator 1451 1452 This overwrites a CeedVector with the point block diagonal of a linear 1453 CeedOperator. 1454 1455 Note: Currently only non-composite CeedOperators with a single field and 1456 composite CeedOperators with single field sub-operators are supported. 1457 1458 Note: Calling this function asserts that setup is complete 1459 and sets the CeedOperator as immutable. 1460 1461 @param op CeedOperator to assemble CeedQFunction 1462 @param[out] assembled CeedVector to store assembled CeedOperator point block 1463 diagonal, provided in row-major form with an 1464 @a num_comp * @a num_comp block at each node. The dimensions 1465 of this vector are derived from the active vector 1466 for the CeedOperator. The array has shape 1467 [nodes, component out, component in]. 1468 @param request Address of CeedRequest for non-blocking completion, else 1469 CEED_REQUEST_IMMEDIATE 1470 1471 @return An error code: 0 - success, otherwise - failure 1472 1473 @ref User 1474 **/ 1475 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1476 CeedVector assembled, CeedRequest *request) { 1477 int ierr; 1478 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1479 1480 // Use backend version, if available 1481 if (op->LinearAssemblePointBlockDiagonal) { 1482 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1483 CeedChk(ierr); 1484 return CEED_ERROR_SUCCESS; 1485 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1486 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1487 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1488 request); CeedChk(ierr); 1489 return CEED_ERROR_SUCCESS; 1490 } else { 1491 // Check for valid fallback resource 1492 const char *resource, *fallback_resource; 1493 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1494 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1495 CeedChk(ierr); 1496 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1497 // Fallback to reference Ceed 1498 if (!op->op_fallback) { 1499 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1500 } 1501 // Assemble 1502 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1503 assembled, request); CeedChk(ierr); 1504 return CEED_ERROR_SUCCESS; 1505 } 1506 } 1507 1508 // Default interface implementation 1509 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1510 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1511 CeedChk(ierr); 1512 return CEED_ERROR_SUCCESS; 1513 } 1514 1515 /** 1516 @brief Assemble the point block diagonal of a square linear CeedOperator 1517 1518 This sums into a CeedVector with the point block diagonal of a linear 1519 CeedOperator. 1520 1521 Note: Currently only non-composite CeedOperators with a single field and 1522 composite CeedOperators with single field sub-operators are supported. 1523 1524 Note: Calling this function asserts that setup is complete 1525 and sets the CeedOperator as immutable. 1526 1527 @param op CeedOperator to assemble CeedQFunction 1528 @param[out] assembled CeedVector to store assembled CeedOperator point block 1529 diagonal, provided in row-major form with an 1530 @a num_comp * @a num_comp block at each node. The dimensions 1531 of this vector are derived from the active vector 1532 for the CeedOperator. The array has shape 1533 [nodes, component out, component in]. 1534 @param request Address of CeedRequest for non-blocking completion, else 1535 CEED_REQUEST_IMMEDIATE 1536 1537 @return An error code: 0 - success, otherwise - failure 1538 1539 @ref User 1540 **/ 1541 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1542 CeedVector assembled, CeedRequest *request) { 1543 int ierr; 1544 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1545 1546 // Use backend version, if available 1547 if (op->LinearAssembleAddPointBlockDiagonal) { 1548 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1549 CeedChk(ierr); 1550 return CEED_ERROR_SUCCESS; 1551 } else { 1552 // Check for valid fallback resource 1553 const char *resource, *fallback_resource; 1554 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1555 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1556 CeedChk(ierr); 1557 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1558 // Fallback to reference Ceed 1559 if (!op->op_fallback) { 1560 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1561 } 1562 // Assemble 1563 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1564 assembled, request); CeedChk(ierr); 1565 return CEED_ERROR_SUCCESS; 1566 } 1567 } 1568 1569 // Default interface implemenation 1570 bool is_composite; 1571 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1572 if (is_composite) { 1573 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1574 true, assembled); CeedChk(ierr); 1575 return CEED_ERROR_SUCCESS; 1576 } else { 1577 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1578 CeedChk(ierr); 1579 return CEED_ERROR_SUCCESS; 1580 } 1581 } 1582 1583 /** 1584 @brief Fully assemble the nonzero pattern of a linear operator. 1585 1586 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1587 1588 The assembly routines use coordinate format, with num_entries tuples of the 1589 form (i, j, value) which indicate that value should be added to the matrix 1590 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1591 This function returns the number of entries and their (i, j) locations, 1592 while CeedOperatorLinearAssemble() provides the values in the same 1593 ordering. 1594 1595 This will generally be slow unless your operator is low-order. 1596 1597 Note: Calling this function asserts that setup is complete 1598 and sets the CeedOperator as immutable. 1599 1600 @param[in] op CeedOperator to assemble 1601 @param[out] num_entries Number of entries in coordinate nonzero pattern 1602 @param[out] rows Row number for each entry 1603 @param[out] cols Column number for each entry 1604 1605 @ref User 1606 **/ 1607 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1608 CeedInt **rows, CeedInt **cols) { 1609 int ierr; 1610 CeedInt num_suboperators, single_entries; 1611 CeedOperator *sub_operators; 1612 bool is_composite; 1613 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1614 1615 // Use backend version, if available 1616 if (op->LinearAssembleSymbolic) { 1617 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1618 return CEED_ERROR_SUCCESS; 1619 } else { 1620 // Check for valid fallback resource 1621 const char *resource, *fallback_resource; 1622 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1623 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1624 CeedChk(ierr); 1625 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1626 // Fallback to reference Ceed 1627 if (!op->op_fallback) { 1628 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1629 } 1630 // Assemble 1631 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1632 cols); CeedChk(ierr); 1633 return CEED_ERROR_SUCCESS; 1634 } 1635 } 1636 1637 // Default interface implementation 1638 1639 // count entries and allocate rows, cols arrays 1640 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1641 *num_entries = 0; 1642 if (is_composite) { 1643 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1644 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1645 for (int k = 0; k < num_suboperators; ++k) { 1646 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1647 &single_entries); CeedChk(ierr); 1648 *num_entries += single_entries; 1649 } 1650 } else { 1651 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1652 &single_entries); CeedChk(ierr); 1653 *num_entries += single_entries; 1654 } 1655 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1656 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1657 1658 // assemble nonzero locations 1659 CeedInt offset = 0; 1660 if (is_composite) { 1661 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1662 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1663 for (int k = 0; k < num_suboperators; ++k) { 1664 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1665 *cols); CeedChk(ierr); 1666 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1667 &single_entries); 1668 CeedChk(ierr); 1669 offset += single_entries; 1670 } 1671 } else { 1672 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1673 CeedChk(ierr); 1674 } 1675 1676 return CEED_ERROR_SUCCESS; 1677 } 1678 1679 /** 1680 @brief Fully assemble the nonzero entries of a linear operator. 1681 1682 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1683 1684 The assembly routines use coordinate format, with num_entries tuples of the 1685 form (i, j, value) which indicate that value should be added to the matrix 1686 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1687 This function returns the values of the nonzero entries to be added, their 1688 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1689 1690 This will generally be slow unless your operator is low-order. 1691 1692 Note: Calling this function asserts that setup is complete 1693 and sets the CeedOperator as immutable. 1694 1695 @param[in] op CeedOperator to assemble 1696 @param[out] values Values to assemble into matrix 1697 1698 @ref User 1699 **/ 1700 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1701 int ierr; 1702 CeedInt num_suboperators, single_entries = 0; 1703 CeedOperator *sub_operators; 1704 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1705 1706 // Use backend version, if available 1707 if (op->LinearAssemble) { 1708 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1709 return CEED_ERROR_SUCCESS; 1710 } else { 1711 // Check for valid fallback resource 1712 const char *resource, *fallback_resource; 1713 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1714 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1715 CeedChk(ierr); 1716 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1717 // Fallback to reference Ceed 1718 if (!op->op_fallback) { 1719 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1720 } 1721 // Assemble 1722 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1723 return CEED_ERROR_SUCCESS; 1724 } 1725 } 1726 1727 // Default interface implementation 1728 bool is_composite; 1729 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1730 1731 CeedInt offset = 0; 1732 if (is_composite) { 1733 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1734 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1735 for (int k = 0; k < num_suboperators; ++k) { 1736 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1737 CeedChk(ierr); 1738 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1739 &single_entries); 1740 CeedChk(ierr); 1741 offset += single_entries; 1742 } 1743 } else { 1744 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1745 } 1746 1747 return CEED_ERROR_SUCCESS; 1748 } 1749 1750 /** 1751 @brief Create a multigrid coarse operator and level transfer operators 1752 for a CeedOperator, creating the prolongation basis from the 1753 fine and coarse grid interpolation 1754 1755 Note: Calling this function asserts that setup is complete 1756 and sets the CeedOperator as immutable. 1757 1758 @param[in] op_fine Fine grid operator 1759 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1760 @param[in] rstr_coarse Coarse grid restriction 1761 @param[in] basis_coarse Coarse grid active vector basis 1762 @param[out] op_coarse Coarse grid operator 1763 @param[out] op_prolong Coarse to fine operator 1764 @param[out] op_restrict Fine to coarse operator 1765 1766 @return An error code: 0 - success, otherwise - failure 1767 1768 @ref User 1769 **/ 1770 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1771 CeedVector p_mult_fine, 1772 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1773 CeedOperator *op_coarse, CeedOperator *op_prolong, 1774 CeedOperator *op_restrict) { 1775 int ierr; 1776 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1777 Ceed ceed; 1778 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1779 1780 // Check for compatible quadrature spaces 1781 CeedBasis basis_fine; 1782 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1783 CeedInt Q_f, Q_c; 1784 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1785 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1786 if (Q_f != Q_c) 1787 // LCOV_EXCL_START 1788 return CeedError(ceed, CEED_ERROR_DIMENSION, 1789 "Bases must have compatible quadrature spaces"); 1790 // LCOV_EXCL_STOP 1791 1792 // Coarse to fine basis 1793 CeedInt P_f, P_c, Q = Q_f; 1794 bool is_tensor_f, is_tensor_c; 1795 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1796 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1797 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1798 if (is_tensor_f && is_tensor_c) { 1799 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1800 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1801 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1802 } else if (!is_tensor_f && !is_tensor_c) { 1803 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1804 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1805 } else { 1806 // LCOV_EXCL_START 1807 return CeedError(ceed, CEED_ERROR_MINOR, 1808 "Bases must both be tensor or non-tensor"); 1809 // LCOV_EXCL_STOP 1810 } 1811 1812 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1813 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1814 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1815 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1816 if (is_tensor_f) { 1817 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1818 memcpy(interp_c, basis_coarse->interp_1d, 1819 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1820 } else { 1821 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1822 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1823 } 1824 1825 // -- QR Factorization, interp_f = Q R 1826 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1827 1828 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1829 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1830 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1831 1832 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1833 for (CeedInt j=0; j<P_c; j++) { // Column j 1834 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1835 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1836 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1837 for (CeedInt k=i+1; k<P_f; k++) 1838 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1839 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1840 } 1841 } 1842 ierr = CeedFree(&tau); CeedChk(ierr); 1843 ierr = CeedFree(&interp_c); CeedChk(ierr); 1844 ierr = CeedFree(&interp_f); CeedChk(ierr); 1845 1846 // Complete with interp_c_to_f versions of code 1847 if (is_tensor_f) { 1848 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1849 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1850 CeedChk(ierr); 1851 } else { 1852 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1853 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1854 CeedChk(ierr); 1855 } 1856 1857 // Cleanup 1858 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1859 return CEED_ERROR_SUCCESS; 1860 } 1861 1862 /** 1863 @brief Create a multigrid coarse operator and level transfer operators 1864 for a CeedOperator with a tensor basis for the active basis 1865 1866 Note: Calling this function asserts that setup is complete 1867 and sets the CeedOperator as immutable. 1868 1869 @param[in] op_fine Fine grid operator 1870 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1871 @param[in] rstr_coarse Coarse grid restriction 1872 @param[in] basis_coarse Coarse grid active vector basis 1873 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1874 @param[out] op_coarse Coarse grid operator 1875 @param[out] op_prolong Coarse to fine operator 1876 @param[out] op_restrict Fine to coarse operator 1877 1878 @return An error code: 0 - success, otherwise - failure 1879 1880 @ref User 1881 **/ 1882 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1883 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1884 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1885 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1886 int ierr; 1887 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1888 Ceed ceed; 1889 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1890 1891 // Check for compatible quadrature spaces 1892 CeedBasis basis_fine; 1893 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1894 CeedInt Q_f, Q_c; 1895 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1896 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1897 if (Q_f != Q_c) 1898 // LCOV_EXCL_START 1899 return CeedError(ceed, CEED_ERROR_DIMENSION, 1900 "Bases must have compatible quadrature spaces"); 1901 // LCOV_EXCL_STOP 1902 1903 // Coarse to fine basis 1904 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1905 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1906 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1907 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1908 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1909 CeedChk(ierr); 1910 P_1d_c = dim == 1 ? num_nodes_c : 1911 dim == 2 ? sqrt(num_nodes_c) : 1912 cbrt(num_nodes_c); 1913 CeedScalar *q_ref, *q_weight, *grad; 1914 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1915 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1916 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1917 CeedBasis basis_c_to_f; 1918 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1919 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1920 CeedChk(ierr); 1921 ierr = CeedFree(&q_ref); CeedChk(ierr); 1922 ierr = CeedFree(&q_weight); CeedChk(ierr); 1923 ierr = CeedFree(&grad); CeedChk(ierr); 1924 1925 // Core code 1926 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1927 basis_coarse, basis_c_to_f, op_coarse, 1928 op_prolong, op_restrict); 1929 CeedChk(ierr); 1930 return CEED_ERROR_SUCCESS; 1931 } 1932 1933 /** 1934 @brief Create a multigrid coarse operator and level transfer operators 1935 for a CeedOperator with a non-tensor basis for the active vector 1936 1937 Note: Calling this function asserts that setup is complete 1938 and sets the CeedOperator as immutable. 1939 1940 @param[in] op_fine Fine grid operator 1941 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1942 @param[in] rstr_coarse Coarse grid restriction 1943 @param[in] basis_coarse Coarse grid active vector basis 1944 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1945 @param[out] op_coarse Coarse grid operator 1946 @param[out] op_prolong Coarse to fine operator 1947 @param[out] op_restrict Fine to coarse operator 1948 1949 @return An error code: 0 - success, otherwise - failure 1950 1951 @ref User 1952 **/ 1953 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1954 CeedVector p_mult_fine, 1955 CeedElemRestriction rstr_coarse, 1956 CeedBasis basis_coarse, 1957 const CeedScalar *interp_c_to_f, 1958 CeedOperator *op_coarse, 1959 CeedOperator *op_prolong, 1960 CeedOperator *op_restrict) { 1961 int ierr; 1962 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1963 Ceed ceed; 1964 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1965 1966 // Check for compatible quadrature spaces 1967 CeedBasis basis_fine; 1968 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1969 CeedInt Q_f, Q_c; 1970 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1971 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1972 if (Q_f != Q_c) 1973 // LCOV_EXCL_START 1974 return CeedError(ceed, CEED_ERROR_DIMENSION, 1975 "Bases must have compatible quadrature spaces"); 1976 // LCOV_EXCL_STOP 1977 1978 // Coarse to fine basis 1979 CeedElemTopology topo; 1980 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1981 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1982 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1983 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1984 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1985 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1986 CeedChk(ierr); 1987 CeedScalar *q_ref, *q_weight, *grad; 1988 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 1989 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1990 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 1991 CeedBasis basis_c_to_f; 1992 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 1993 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1994 CeedChk(ierr); 1995 ierr = CeedFree(&q_ref); CeedChk(ierr); 1996 ierr = CeedFree(&q_weight); CeedChk(ierr); 1997 ierr = CeedFree(&grad); CeedChk(ierr); 1998 1999 // Core code 2000 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2001 basis_coarse, basis_c_to_f, op_coarse, 2002 op_prolong, op_restrict); 2003 CeedChk(ierr); 2004 return CEED_ERROR_SUCCESS; 2005 } 2006 2007 /** 2008 @brief Build a FDM based approximate inverse for each element for a 2009 CeedOperator 2010 2011 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2012 Method based approximate inverse. This function obtains the simultaneous 2013 diagonalization for the 1D mass and Laplacian operators, 2014 M = V^T V, K = V^T S V. 2015 The assembled QFunction is used to modify the eigenvalues from simultaneous 2016 diagonalization and obtain an approximate inverse of the form 2017 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2018 associated CeedQFunction must therefore also be linear. 2019 2020 Note: Calling this function asserts that setup is complete 2021 and sets the CeedOperator as immutable. 2022 2023 @param op CeedOperator to create element inverses 2024 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2025 for each element 2026 @param request Address of CeedRequest for non-blocking completion, else 2027 @ref CEED_REQUEST_IMMEDIATE 2028 2029 @return An error code: 0 - success, otherwise - failure 2030 2031 @ref User 2032 **/ 2033 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2034 CeedRequest *request) { 2035 int ierr; 2036 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2037 2038 // Use backend version, if available 2039 if (op->CreateFDMElementInverse) { 2040 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2041 return CEED_ERROR_SUCCESS; 2042 } else { 2043 // Check for valid fallback resource 2044 const char *resource, *fallback_resource; 2045 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2046 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2047 CeedChk(ierr); 2048 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2049 // Fallback to reference Ceed 2050 if (!op->op_fallback) { 2051 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2052 } 2053 // Assemble 2054 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2055 CeedChk(ierr); 2056 return CEED_ERROR_SUCCESS; 2057 } 2058 } 2059 2060 // Interface implementation 2061 Ceed ceed, ceed_parent; 2062 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2063 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2064 ceed_parent = ceed_parent ? ceed_parent : ceed; 2065 CeedQFunction qf; 2066 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2067 2068 // Determine active input basis 2069 bool interp = false, grad = false; 2070 CeedBasis basis = NULL; 2071 CeedElemRestriction rstr = NULL; 2072 CeedOperatorField *op_fields; 2073 CeedQFunctionField *qf_fields; 2074 CeedInt num_input_fields; 2075 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2076 CeedChk(ierr); 2077 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2078 for (CeedInt i=0; i<num_input_fields; i++) { 2079 CeedVector vec; 2080 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2081 if (vec == CEED_VECTOR_ACTIVE) { 2082 CeedEvalMode eval_mode; 2083 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2084 interp = interp || eval_mode == CEED_EVAL_INTERP; 2085 grad = grad || eval_mode == CEED_EVAL_GRAD; 2086 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2087 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2088 } 2089 } 2090 if (!basis) 2091 // LCOV_EXCL_START 2092 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2093 // LCOV_EXCL_STOP 2094 CeedSize l_size = 1; 2095 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2096 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2097 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2098 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2099 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2100 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2101 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2102 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2103 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2104 2105 // Build and diagonalize 1D Mass and Laplacian 2106 bool tensor_basis; 2107 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2108 if (!tensor_basis) 2109 // LCOV_EXCL_START 2110 return CeedError(ceed, CEED_ERROR_BACKEND, 2111 "FDMElementInverse only supported for tensor " 2112 "bases"); 2113 // LCOV_EXCL_STOP 2114 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2115 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2116 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2117 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2118 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2119 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2120 // -- Build matrices 2121 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2122 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2123 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2124 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2125 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2126 mass, laplace); CeedChk(ierr); 2127 2128 // -- Diagonalize 2129 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2130 CeedChk(ierr); 2131 ierr = CeedFree(&mass); CeedChk(ierr); 2132 ierr = CeedFree(&laplace); CeedChk(ierr); 2133 for (CeedInt i=0; i<P_1d; i++) 2134 for (CeedInt j=0; j<P_1d; j++) 2135 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2136 ierr = CeedFree(&x); CeedChk(ierr); 2137 2138 // Assemble QFunction 2139 CeedVector assembled; 2140 CeedElemRestriction rstr_qf; 2141 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2142 &rstr_qf, request); CeedChk(ierr); 2143 CeedInt layout[3]; 2144 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2145 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2146 CeedScalar max_norm = 0; 2147 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2148 2149 // Calculate element averages 2150 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2151 CeedScalar *elem_avg; 2152 const CeedScalar *assembled_array, *q_weight_array; 2153 CeedVector q_weight; 2154 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2155 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2156 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2157 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2158 CeedChk(ierr); 2159 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2160 CeedChk(ierr); 2161 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2162 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2163 for (CeedInt e=0; e<num_elem; e++) { 2164 CeedInt count = 0; 2165 for (CeedInt q=0; q<num_qpts; q++) 2166 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2167 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2168 qf_value_bound) { 2169 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2170 q_weight_array[q]; 2171 count++; 2172 } 2173 if (count) { 2174 elem_avg[e] /= count; 2175 } else { 2176 elem_avg[e] = 1.0; 2177 } 2178 } 2179 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2180 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2181 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2182 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2183 2184 // Build FDM diagonal 2185 CeedVector q_data; 2186 CeedScalar *q_data_array, *fdm_diagonal; 2187 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2188 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2189 for (CeedInt c=0; c<num_comp; c++) 2190 for (CeedInt n=0; n<elem_size; n++) { 2191 if (interp) 2192 fdm_diagonal[c*elem_size + n] = 1.0; 2193 if (grad) 2194 for (CeedInt d=0; d<dim; d++) { 2195 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2196 fdm_diagonal[c*elem_size + n] += lambda[i]; 2197 } 2198 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2199 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2200 } 2201 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2202 CeedChk(ierr); 2203 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2204 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2205 CeedChk(ierr); 2206 for (CeedInt e=0; e<num_elem; e++) 2207 for (CeedInt c=0; c<num_comp; c++) 2208 for (CeedInt n=0; n<elem_size; n++) 2209 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2210 fdm_diagonal[c*elem_size + n]); 2211 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2212 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2213 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2214 2215 // Setup FDM operator 2216 // -- Basis 2217 CeedBasis fdm_basis; 2218 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2219 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2220 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2221 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2222 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2223 fdm_interp, grad_dummy, q_ref_dummy, 2224 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2225 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2226 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2227 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2228 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2229 ierr = CeedFree(&lambda); CeedChk(ierr); 2230 2231 // -- Restriction 2232 CeedElemRestriction rstr_qd_i; 2233 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2234 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2235 num_comp, num_elem*num_comp*elem_size, 2236 strides, &rstr_qd_i); CeedChk(ierr); 2237 // -- QFunction 2238 CeedQFunction qf_fdm; 2239 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2240 CeedChk(ierr); 2241 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2242 CeedChk(ierr); 2243 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2244 CeedChk(ierr); 2245 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2246 CeedChk(ierr); 2247 // -- QFunction context 2248 CeedInt *num_comp_data; 2249 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2250 num_comp_data[0] = num_comp; 2251 CeedQFunctionContext ctx_fdm; 2252 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2253 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2254 sizeof(*num_comp_data), num_comp_data); 2255 CeedChk(ierr); 2256 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2257 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2258 // -- Operator 2259 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2260 CeedChk(ierr); 2261 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2262 CeedChk(ierr); 2263 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2264 q_data); CeedChk(ierr); 2265 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2266 CeedChk(ierr); 2267 2268 // Cleanup 2269 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2270 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2271 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2272 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2273 2274 return CEED_ERROR_SUCCESS; 2275 } 2276 2277 /// @} 2278