1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->is_interface_setup = false; 75 op_ref->is_backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 79 &op_ref->qf_assembled); CeedChk(ierr); 80 op->op_fallback = op_ref; 81 82 // Clone QF 83 CeedQFunction qf_ref; 84 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 85 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 86 qf_ref->data = NULL; 87 qf_ref->ceed = ceed_ref; 88 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 89 op_ref->qf = qf_ref; 90 op->qf_fallback = qf_ref; 91 return CEED_ERROR_SUCCESS; 92 } 93 94 /** 95 @brief Select correct basis matrix pointer based on CeedEvalMode 96 97 @param[in] eval_mode Current basis evaluation mode 98 @param[in] identity Pointer to identity matrix 99 @param[in] interp Pointer to interpolation matrix 100 @param[in] grad Pointer to gradient matrix 101 @param[out] basis_ptr Basis pointer to set 102 103 @ref Developer 104 **/ 105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 106 const CeedScalar *identity, const CeedScalar *interp, 107 const CeedScalar *grad, const CeedScalar **basis_ptr) { 108 switch (eval_mode) { 109 case CEED_EVAL_NONE: 110 *basis_ptr = identity; 111 break; 112 case CEED_EVAL_INTERP: 113 *basis_ptr = interp; 114 break; 115 case CEED_EVAL_GRAD: 116 *basis_ptr = grad; 117 break; 118 case CEED_EVAL_WEIGHT: 119 case CEED_EVAL_DIV: 120 case CEED_EVAL_CURL: 121 break; // Caught by QF Assembly 122 } 123 } 124 125 /** 126 @brief Create point block restriction for active operator field 127 128 @param[in] rstr Original CeedElemRestriction for active field 129 @param[out] pointblock_rstr Address of the variable where the newly created 130 CeedElemRestriction will be stored 131 132 @return An error code: 0 - success, otherwise - failure 133 134 @ref Developer 135 **/ 136 static int CeedOperatorCreateActivePointBlockRestriction( 137 CeedElemRestriction rstr, 138 CeedElemRestriction *pointblock_rstr) { 139 int ierr; 140 Ceed ceed; 141 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 142 const CeedInt *offsets; 143 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 144 CeedChk(ierr); 145 146 // Expand offsets 147 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 148 *pointblock_offsets; 149 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 152 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 153 CeedInt shift = num_comp; 154 if (comp_stride != 1) 155 shift *= num_comp; 156 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 157 CeedChk(ierr); 158 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 159 pointblock_offsets[i] = offsets[i]*shift; 160 if (pointblock_offsets[i] > max) 161 max = pointblock_offsets[i]; 162 } 163 164 // Create new restriction 165 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 166 1, max + num_comp*num_comp, CEED_MEM_HOST, 167 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 168 CeedChk(ierr); 169 170 // Cleanup 171 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 172 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else 181 CEED_REQUEST_IMMEDIATE 182 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 183 @param[out] assembled CeedVector to store assembled diagonal 184 185 @return An error code: 0 - success, otherwise - failure 186 187 @ref Developer 188 **/ 189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 190 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 191 int ierr; 192 Ceed ceed; 193 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 194 195 // Assemble QFunction 196 CeedQFunction qf; 197 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 198 CeedInt num_input_fields, num_output_fields; 199 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 200 CeedChk(ierr); 201 CeedVector assembled_qf; 202 CeedElemRestriction rstr; 203 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 204 &rstr, request); CeedChk(ierr); 205 CeedInt layout[3]; 206 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 207 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 208 CeedScalar max_norm = 0; 209 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 210 211 // Determine active input basis 212 CeedOperatorField *op_fields; 213 CeedQFunctionField *qf_fields; 214 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 215 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 216 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 217 CeedEvalMode *eval_mode_in = NULL; 218 CeedBasis basis_in = NULL; 219 CeedElemRestriction rstr_in = NULL; 220 for (CeedInt i=0; i<num_input_fields; i++) { 221 CeedVector vec; 222 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 223 if (vec == CEED_VECTOR_ACTIVE) { 224 CeedElemRestriction rstr; 225 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 226 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 227 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 228 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 229 if (rstr_in && rstr_in != rstr) 230 // LCOV_EXCL_START 231 return CeedError(ceed, CEED_ERROR_BACKEND, 232 "Multi-field non-composite operator diagonal assembly not supported"); 233 // LCOV_EXCL_STOP 234 rstr_in = rstr; 235 CeedEvalMode eval_mode; 236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 237 switch (eval_mode) { 238 case CEED_EVAL_NONE: 239 case CEED_EVAL_INTERP: 240 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 241 eval_mode_in[num_eval_mode_in] = eval_mode; 242 num_eval_mode_in += 1; 243 break; 244 case CEED_EVAL_GRAD: 245 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 246 for (CeedInt d=0; d<dim; d++) 247 eval_mode_in[num_eval_mode_in+d] = eval_mode; 248 num_eval_mode_in += dim; 249 break; 250 case CEED_EVAL_WEIGHT: 251 case CEED_EVAL_DIV: 252 case CEED_EVAL_CURL: 253 break; // Caught by QF Assembly 254 } 255 } 256 } 257 258 // Determine active output basis 259 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 260 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 261 CeedInt num_eval_mode_out = 0; 262 CeedEvalMode *eval_mode_out = NULL; 263 CeedBasis basis_out = NULL; 264 CeedElemRestriction rstr_out = NULL; 265 for (CeedInt i=0; i<num_output_fields; i++) { 266 CeedVector vec; 267 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 268 if (vec == CEED_VECTOR_ACTIVE) { 269 CeedElemRestriction rstr; 270 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 271 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 272 if (rstr_out && rstr_out != rstr) 273 // LCOV_EXCL_START 274 return CeedError(ceed, CEED_ERROR_BACKEND, 275 "Multi-field non-composite operator diagonal assembly not supported"); 276 // LCOV_EXCL_STOP 277 rstr_out = rstr; 278 CeedEvalMode eval_mode; 279 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 280 switch (eval_mode) { 281 case CEED_EVAL_NONE: 282 case CEED_EVAL_INTERP: 283 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 284 eval_mode_out[num_eval_mode_out] = eval_mode; 285 num_eval_mode_out += 1; 286 break; 287 case CEED_EVAL_GRAD: 288 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 289 for (CeedInt d=0; d<dim; d++) 290 eval_mode_out[num_eval_mode_out+d] = eval_mode; 291 num_eval_mode_out += dim; 292 break; 293 case CEED_EVAL_WEIGHT: 294 case CEED_EVAL_DIV: 295 case CEED_EVAL_CURL: 296 break; // Caught by QF Assembly 297 } 298 } 299 } 300 301 // Assemble point block diagonal restriction, if needed 302 CeedElemRestriction diag_rstr = rstr_out; 303 if (is_pointblock) { 304 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 305 CeedChk(ierr); 306 } 307 308 // Create diagonal vector 309 CeedVector elem_diag; 310 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 311 CeedChk(ierr); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array; 315 const CeedScalar *assembled_qf_array; 316 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 317 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 318 CeedChk(ierr); 319 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 320 CeedChk(ierr); 321 CeedInt num_elem, num_nodes, num_qpts; 322 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 323 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 324 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 325 // Basis matrices 326 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 327 CeedScalar *identity = NULL; 328 bool evalNone = false; 329 for (CeedInt i=0; i<num_eval_mode_in; i++) 330 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 331 for (CeedInt i=0; i<num_eval_mode_out; i++) 332 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 333 if (evalNone) { 334 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 335 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 336 identity[i*num_nodes+i] = 1.0; 337 } 338 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 339 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 340 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 341 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 342 // Compute the diagonal of B^T D B 343 // Each element 344 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 345 for (CeedInt e=0; e<num_elem; e++) { 346 CeedInt d_out = -1; 347 // Each basis eval mode pair 348 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 349 const CeedScalar *bt = NULL; 350 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 351 d_out += 1; 352 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 353 &grad_out[d_out*num_qpts*num_nodes], &bt); 354 CeedInt d_in = -1; 355 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 356 const CeedScalar *b = NULL; 357 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 358 d_in += 1; 359 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 360 &grad_in[d_in*num_qpts*num_nodes], &b); 361 // Each component 362 for (CeedInt c_out=0; c_out<num_comp; c_out++) 363 // Each qpoint/node pair 364 for (CeedInt q=0; q<num_qpts; q++) 365 if (is_pointblock) { 366 // Point Block Diagonal 367 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 368 const CeedScalar qf_value = 369 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 370 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 371 if (fabs(qf_value) > qf_value_bound) 372 for (CeedInt n=0; n<num_nodes; n++) 373 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 374 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 375 } 376 } else { 377 // Diagonal Only 378 const CeedScalar qf_value = 379 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 380 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 381 if (fabs(qf_value) > qf_value_bound) 382 for (CeedInt n=0; n<num_nodes; n++) 383 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 384 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 385 } 386 } 387 } 388 } 389 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 390 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 391 CeedChk(ierr); 392 393 // Assemble local operator diagonal 394 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 395 assembled, request); CeedChk(ierr); 396 397 // Cleanup 398 if (is_pointblock) { 399 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 400 } 401 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 402 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 403 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 404 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 405 ierr = CeedFree(&identity); CeedChk(ierr); 406 407 return CEED_ERROR_SUCCESS; 408 } 409 410 /** 411 @brief Core logic for assembling composite operator diagonal 412 413 @param[in] op CeedOperator to assemble point block diagonal 414 @param[in] request Address of CeedRequest for non-blocking completion, else 415 CEED_REQUEST_IMMEDIATE 416 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 417 @param[out] assembled CeedVector to store assembled diagonal 418 419 @return An error code: 0 - success, otherwise - failure 420 421 @ref Developer 422 **/ 423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 424 CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 int ierr; 427 CeedInt num_sub; 428 CeedOperator *suboperators; 429 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 430 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 431 for (CeedInt i = 0; i < num_sub; i++) { 432 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 433 is_pointblock, assembled); CeedChk(ierr); 434 } 435 return CEED_ERROR_SUCCESS; 436 } 437 438 /** 439 @brief Build nonzero pattern for non-composite operator 440 441 Users should generally use CeedOperatorLinearAssembleSymbolic() 442 443 @param[in] op CeedOperator to assemble nonzero pattern 444 @param[in] offset Offset for number of entries 445 @param[out] rows Row number for each entry 446 @param[out] cols Column number for each entry 447 448 @return An error code: 0 - success, otherwise - failure 449 450 @ref Developer 451 **/ 452 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 453 CeedInt *rows, CeedInt *cols) { 454 int ierr; 455 Ceed ceed = op->ceed; 456 if (op->is_composite) 457 // LCOV_EXCL_START 458 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 459 "Composite operator not supported"); 460 // LCOV_EXCL_STOP 461 462 CeedSize num_nodes; 463 ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr); 464 CeedElemRestriction rstr_in; 465 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 466 CeedInt num_elem, elem_size, num_comp; 467 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 468 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 469 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 470 CeedInt layout_er[3]; 471 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 472 473 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 474 475 // Determine elem_dof relation 476 CeedVector index_vec; 477 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 478 CeedScalar *array; 479 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 480 for (CeedInt i = 0; i < num_nodes; ++i) { 481 array[i] = i; 482 } 483 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 484 CeedVector elem_dof; 485 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 486 CeedChk(ierr); 487 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 488 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 489 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 490 const CeedScalar *elem_dof_a; 491 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 492 CeedChk(ierr); 493 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 494 495 // Determine i, j locations for element matrices 496 CeedInt count = 0; 497 for (int e = 0; e < num_elem; ++e) { 498 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 499 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 500 for (int i = 0; i < elem_size; ++i) { 501 for (int j = 0; j < elem_size; ++j) { 502 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 503 (comp_out)*layout_er[1] + e*layout_er[2]; 504 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 505 (comp_in)*layout_er[1] + e*layout_er[2]; 506 507 const CeedInt row = elem_dof_a[elem_dof_index_row]; 508 const CeedInt col = elem_dof_a[elem_dof_index_col]; 509 510 rows[offset + count] = row; 511 cols[offset + count] = col; 512 count++; 513 } 514 } 515 } 516 } 517 } 518 if (count != local_num_entries) 519 // LCOV_EXCL_START 520 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 521 // LCOV_EXCL_STOP 522 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 523 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 524 525 return CEED_ERROR_SUCCESS; 526 } 527 528 /** 529 @brief Assemble nonzero entries for non-composite operator 530 531 Users should generally use CeedOperatorLinearAssemble() 532 533 @param[in] op CeedOperator to assemble 534 @param[out] offset Offest for number of entries 535 @param[out] values Values to assemble into matrix 536 537 @return An error code: 0 - success, otherwise - failure 538 539 @ref Developer 540 **/ 541 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 542 CeedVector values) { 543 int ierr; 544 Ceed ceed = op->ceed; 545 if (op->is_composite) 546 // LCOV_EXCL_START 547 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 548 "Composite operator not supported"); 549 // LCOV_EXCL_STOP 550 551 // Assemble QFunction 552 CeedQFunction qf; 553 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 554 CeedVector assembled_qf; 555 CeedElemRestriction rstr_q; 556 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 557 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 558 559 CeedSize qf_length; 560 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 561 562 CeedInt num_input_fields, num_output_fields; 563 CeedOperatorField *input_fields; 564 CeedOperatorField *output_fields; 565 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 566 &num_output_fields, &output_fields); CeedChk(ierr); 567 568 // Determine active input basis 569 CeedQFunctionField *qf_fields; 570 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 571 CeedInt num_eval_mode_in = 0, dim = 1; 572 CeedEvalMode *eval_mode_in = NULL; 573 CeedBasis basis_in = NULL; 574 CeedElemRestriction rstr_in = NULL; 575 for (CeedInt i=0; i<num_input_fields; i++) { 576 CeedVector vec; 577 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 578 if (vec == CEED_VECTOR_ACTIVE) { 579 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 580 CeedChk(ierr); 581 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 582 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 583 CeedChk(ierr); 584 CeedEvalMode eval_mode; 585 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 586 CeedChk(ierr); 587 switch (eval_mode) { 588 case CEED_EVAL_NONE: 589 case CEED_EVAL_INTERP: 590 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 591 eval_mode_in[num_eval_mode_in] = eval_mode; 592 num_eval_mode_in += 1; 593 break; 594 case CEED_EVAL_GRAD: 595 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 596 for (CeedInt d=0; d<dim; d++) { 597 eval_mode_in[num_eval_mode_in+d] = eval_mode; 598 } 599 num_eval_mode_in += dim; 600 break; 601 case CEED_EVAL_WEIGHT: 602 case CEED_EVAL_DIV: 603 case CEED_EVAL_CURL: 604 break; // Caught by QF Assembly 605 } 606 } 607 } 608 609 // Determine active output basis 610 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 611 CeedInt num_eval_mode_out = 0; 612 CeedEvalMode *eval_mode_out = NULL; 613 CeedBasis basis_out = NULL; 614 CeedElemRestriction rstr_out = NULL; 615 for (CeedInt i=0; i<num_output_fields; i++) { 616 CeedVector vec; 617 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 618 if (vec == CEED_VECTOR_ACTIVE) { 619 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 620 CeedChk(ierr); 621 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 622 CeedChk(ierr); 623 CeedEvalMode eval_mode; 624 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 625 CeedChk(ierr); 626 switch (eval_mode) { 627 case CEED_EVAL_NONE: 628 case CEED_EVAL_INTERP: 629 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 630 eval_mode_out[num_eval_mode_out] = eval_mode; 631 num_eval_mode_out += 1; 632 break; 633 case CEED_EVAL_GRAD: 634 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 635 for (CeedInt d=0; d<dim; d++) { 636 eval_mode_out[num_eval_mode_out+d] = eval_mode; 637 } 638 num_eval_mode_out += dim; 639 break; 640 case CEED_EVAL_WEIGHT: 641 case CEED_EVAL_DIV: 642 case CEED_EVAL_CURL: 643 break; // Caught by QF Assembly 644 } 645 } 646 } 647 648 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 649 // LCOV_EXCL_START 650 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 651 "Cannot assemble operator with out inputs/outputs"); 652 // LCOV_EXCL_STOP 653 654 CeedInt num_elem, elem_size, num_qpts, num_comp; 655 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 656 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 657 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 658 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 659 660 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 661 662 // loop over elements and put in data structure 663 const CeedScalar *interp_in, *grad_in; 664 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 665 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 666 667 const CeedScalar *assembled_qf_array; 668 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 669 CeedChk(ierr); 670 671 CeedInt layout_qf[3]; 672 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 673 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 674 675 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 676 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 677 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 678 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 679 num_qpts]; // logically 3-tensor 680 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 681 CeedScalar elem_mat[elem_size * elem_size]; 682 int count = 0; 683 CeedScalar *vals; 684 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 685 for (int e = 0; e < num_elem; ++e) { 686 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 687 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 688 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 689 B_mat_in[ell] = 0.0; 690 } 691 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 692 B_mat_out[ell] = 0.0; 693 } 694 // Store block-diagonal D matrix as collection of small dense blocks 695 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 696 D_mat[ell] = 0.0; 697 } 698 // form element matrix itself (for each block component) 699 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 700 elem_mat[ell] = 0.0; 701 } 702 for (int q = 0; q < num_qpts; ++q) { 703 for (int n = 0; n < elem_size; ++n) { 704 CeedInt d_in = -1; 705 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 706 const int qq = num_eval_mode_in*q; 707 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 708 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 709 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 710 d_in += 1; 711 B_mat_in[(qq+e_in)*elem_size + n] += 712 grad_in[(d_in*num_qpts+q) * elem_size + n]; 713 } else { 714 // LCOV_EXCL_START 715 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 716 // LCOV_EXCL_STOP 717 } 718 } 719 CeedInt d_out = -1; 720 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 721 const int qq = num_eval_mode_out*q; 722 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 723 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 724 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 725 d_out += 1; 726 B_mat_out[(qq+e_out)*elem_size + n] += 727 grad_in[(d_out*num_qpts+q) * elem_size + n]; 728 } else { 729 // LCOV_EXCL_START 730 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 731 // LCOV_EXCL_STOP 732 } 733 } 734 } 735 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 736 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 737 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 738 +comp_out; 739 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 740 e*layout_qf[2]; 741 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 742 } 743 } 744 } 745 // Compute B^T*D 746 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 747 BTD[ell] = 0.0; 748 } 749 for (int j = 0; j<elem_size; ++j) { 750 for (int q = 0; q<num_qpts; ++q) { 751 int qq = num_eval_mode_out*q; 752 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 753 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 754 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 755 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 756 } 757 } 758 } 759 } 760 761 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 762 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 763 764 // put element matrix in coordinate data structure 765 for (int i = 0; i < elem_size; ++i) { 766 for (int j = 0; j < elem_size; ++j) { 767 vals[offset + count] = elem_mat[i*elem_size + j]; 768 count++; 769 } 770 } 771 } 772 } 773 } 774 if (count != local_num_entries) 775 // LCOV_EXCL_START 776 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 777 // LCOV_EXCL_STOP 778 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 779 780 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 781 CeedChk(ierr); 782 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 783 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 784 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 785 786 return CEED_ERROR_SUCCESS; 787 } 788 789 /** 790 @brief Count number of entries for assembled CeedOperator 791 792 @param[in] op CeedOperator to assemble 793 @param[out] num_entries Number of entries in assembled representation 794 795 @return An error code: 0 - success, otherwise - failure 796 797 @ref Utility 798 **/ 799 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 800 CeedInt *num_entries) { 801 int ierr; 802 CeedElemRestriction rstr; 803 CeedInt num_elem, elem_size, num_comp; 804 805 if (op->is_composite) 806 // LCOV_EXCL_START 807 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 808 "Composite operator not supported"); 809 // LCOV_EXCL_STOP 810 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 811 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 812 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 813 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 814 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 815 816 return CEED_ERROR_SUCCESS; 817 } 818 819 /** 820 @brief Common code for creating a multigrid coarse operator and level 821 transfer operators for a CeedOperator 822 823 @param[in] op_fine Fine grid operator 824 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 825 @param[in] rstr_coarse Coarse grid restriction 826 @param[in] basis_coarse Coarse grid active vector basis 827 @param[in] basis_c_to_f Basis for coarse to fine interpolation 828 @param[out] op_coarse Coarse grid operator 829 @param[out] op_prolong Coarse to fine operator 830 @param[out] op_restrict Fine to coarse operator 831 832 @return An error code: 0 - success, otherwise - failure 833 834 @ref Developer 835 **/ 836 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 837 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 838 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 839 CeedOperator *op_restrict) { 840 int ierr; 841 Ceed ceed; 842 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 843 844 // Check for composite operator 845 bool is_composite; 846 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 847 if (is_composite) 848 // LCOV_EXCL_START 849 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 850 "Automatic multigrid setup for composite operators not supported"); 851 // LCOV_EXCL_STOP 852 853 // Coarse Grid 854 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 855 op_coarse); CeedChk(ierr); 856 CeedElemRestriction rstr_fine = NULL; 857 // -- Clone input fields 858 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 859 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 860 rstr_fine = op_fine->input_fields[i]->elem_restr; 861 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 862 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 863 CeedChk(ierr); 864 } else { 865 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 866 op_fine->input_fields[i]->elem_restr, 867 op_fine->input_fields[i]->basis, 868 op_fine->input_fields[i]->vec); CeedChk(ierr); 869 } 870 } 871 // -- Clone output fields 872 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 873 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 874 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 875 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 876 CeedChk(ierr); 877 } else { 878 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 879 op_fine->output_fields[i]->elem_restr, 880 op_fine->output_fields[i]->basis, 881 op_fine->output_fields[i]->vec); CeedChk(ierr); 882 } 883 } 884 // -- Clone QFunctionAssemblyData 885 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 886 &(*op_coarse)->qf_assembled); CeedChk(ierr); 887 888 // Multiplicity vector 889 CeedVector mult_vec, mult_e_vec; 890 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 891 CeedChk(ierr); 892 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 893 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 894 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 895 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 896 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 897 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 898 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 899 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 900 901 // Restriction 902 CeedInt num_comp; 903 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 904 CeedQFunction qf_restrict; 905 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 906 CeedChk(ierr); 907 CeedInt *num_comp_r_data; 908 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 909 num_comp_r_data[0] = num_comp; 910 CeedQFunctionContext ctx_r; 911 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 912 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 913 sizeof(*num_comp_r_data), num_comp_r_data); 914 CeedChk(ierr); 915 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 916 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 917 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 918 CeedChk(ierr); 919 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 920 CeedChk(ierr); 921 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 922 CEED_EVAL_INTERP); CeedChk(ierr); 923 924 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 925 CEED_QFUNCTION_NONE, op_restrict); 926 CeedChk(ierr); 927 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 928 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 929 CeedChk(ierr); 930 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 931 CEED_BASIS_COLLOCATED, mult_vec); 932 CeedChk(ierr); 933 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 934 CEED_VECTOR_ACTIVE); CeedChk(ierr); 935 936 // Prolongation 937 CeedQFunction qf_prolong; 938 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 939 CeedChk(ierr); 940 CeedInt *num_comp_p_data; 941 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 942 num_comp_p_data[0] = num_comp; 943 CeedQFunctionContext ctx_p; 944 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 945 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 946 sizeof(*num_comp_p_data), num_comp_p_data); 947 CeedChk(ierr); 948 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 949 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 950 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 951 CeedChk(ierr); 952 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 953 CeedChk(ierr); 954 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 955 CeedChk(ierr); 956 957 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 958 CEED_QFUNCTION_NONE, op_prolong); 959 CeedChk(ierr); 960 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 961 CEED_VECTOR_ACTIVE); CeedChk(ierr); 962 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 963 CEED_BASIS_COLLOCATED, mult_vec); 964 CeedChk(ierr); 965 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 966 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 967 CeedChk(ierr); 968 969 // Cleanup 970 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 971 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 972 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 973 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 974 return CEED_ERROR_SUCCESS; 975 } 976 977 /** 978 @brief Build 1D mass matrix and Laplacian with perturbation 979 980 @param[in] interp_1d Interpolation matrix in one dimension 981 @param[in] grad_1d Gradient matrix in one dimension 982 @param[in] q_weight_1d Quadrature weights in one dimension 983 @param[in] P_1d Number of basis nodes in one dimension 984 @param[in] Q_1d Number of quadrature points in one dimension 985 @param[in] dim Dimension of basis 986 @param[out] mass Assembled mass matrix in one dimension 987 @param[out] laplace Assembled perturbed Laplacian in one dimension 988 989 @return An error code: 0 - success, otherwise - failure 990 991 @ref Developer 992 **/ 993 CeedPragmaOptimizeOff 994 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 995 const CeedScalar *grad_1d, 996 const CeedScalar *q_weight_1d, CeedInt P_1d, 997 CeedInt Q_1d, CeedInt dim, 998 CeedScalar *mass, CeedScalar *laplace) { 999 1000 for (CeedInt i=0; i<P_1d; i++) 1001 for (CeedInt j=0; j<P_1d; j++) { 1002 CeedScalar sum = 0.0; 1003 for (CeedInt k=0; k<Q_1d; k++) 1004 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1005 mass[i+j*P_1d] = sum; 1006 } 1007 // -- Laplacian 1008 for (CeedInt i=0; i<P_1d; i++) 1009 for (CeedInt j=0; j<P_1d; j++) { 1010 CeedScalar sum = 0.0; 1011 for (CeedInt k=0; k<Q_1d; k++) 1012 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1013 laplace[i+j*P_1d] = sum; 1014 } 1015 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1016 for (CeedInt i=0; i<P_1d; i++) 1017 laplace[i+P_1d*i] += perturbation; 1018 return CEED_ERROR_SUCCESS; 1019 } 1020 CeedPragmaOptimizeOn 1021 1022 /// @} 1023 1024 /// ---------------------------------------------------------------------------- 1025 /// CeedOperator Backend API 1026 /// ---------------------------------------------------------------------------- 1027 /// @addtogroup CeedOperatorBackend 1028 /// @{ 1029 1030 /** 1031 @brief Create object holding CeedQFunction assembly data for CeedOperator 1032 1033 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1034 @param[out] data Address of the variable where the newly created 1035 CeedQFunctionAssemblyData will be stored 1036 1037 @return An error code: 0 - success, otherwise - failure 1038 1039 @ref Backend 1040 **/ 1041 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1042 CeedQFunctionAssemblyData *data) { 1043 int ierr; 1044 1045 ierr = CeedCalloc(1, data); CeedChk(ierr); 1046 (*data)->ref_count = 1; 1047 (*data)->ceed = ceed; 1048 ierr = CeedReference(ceed); CeedChk(ierr); 1049 1050 return CEED_ERROR_SUCCESS; 1051 } 1052 1053 /** 1054 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1055 1056 @param data CeedQFunctionAssemblyData to increment the reference counter 1057 1058 @return An error code: 0 - success, otherwise - failure 1059 1060 @ref Backend 1061 **/ 1062 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1063 data->ref_count++; 1064 return CEED_ERROR_SUCCESS; 1065 } 1066 1067 /** 1068 @brief Set re-use of CeedQFunctionAssemblyData 1069 1070 @param data CeedQFunctionAssemblyData to mark for reuse 1071 @param reuse_data Boolean flag indicating data re-use 1072 1073 @return An error code: 0 - success, otherwise - failure 1074 1075 @ref Backend 1076 **/ 1077 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, 1078 bool reuse_data) { 1079 data->reuse_data = reuse_data; 1080 data->needs_data_update = true; 1081 return CEED_ERROR_SUCCESS; 1082 } 1083 1084 /** 1085 @brief Mark QFunctionAssemblyData as stale 1086 1087 @param data CeedQFunctionAssemblyData to mark as stale 1088 @param needs_data_update Boolean flag indicating if update is needed or completed 1089 1090 @return An error code: 0 - success, otherwise - failure 1091 1092 @ref Backend 1093 **/ 1094 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, 1095 bool needs_data_update) { 1096 data->needs_data_update = needs_data_update; 1097 return CEED_ERROR_SUCCESS; 1098 } 1099 1100 /** 1101 @brief Determine if QFunctionAssemblyData needs update 1102 1103 @param[in] data CeedQFunctionAssemblyData to mark as stale 1104 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1105 1106 @return An error code: 0 - success, otherwise - failure 1107 1108 @ref Backend 1109 **/ 1110 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1111 bool *is_update_needed) { 1112 *is_update_needed = !data->reuse_data || data->needs_data_update; 1113 return CEED_ERROR_SUCCESS; 1114 } 1115 1116 /** 1117 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1118 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1119 Note: If `*data_copy` is non-NULL, then it is assumed that 1120 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1121 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1122 the only reference to this CeedQFunctionAssemblyData. 1123 1124 @param data CeedQFunctionAssemblyData to copy reference to 1125 @param[out] data_copy Variable to store copied reference 1126 1127 @return An error code: 0 - success, otherwise - failure 1128 1129 @ref Backend 1130 **/ 1131 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1132 CeedQFunctionAssemblyData *data_copy) { 1133 int ierr; 1134 1135 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1136 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1137 *data_copy = data; 1138 return CEED_ERROR_SUCCESS; 1139 } 1140 1141 /** 1142 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1143 1144 @param[in] data CeedQFunctionAssemblyData to retreive status 1145 @param[out] is_setup Boolean flag for setup status 1146 1147 @return An error code: 0 - success, otherwise - failure 1148 1149 @ref Backend 1150 **/ 1151 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1152 bool *is_setup) { 1153 *is_setup = data->is_setup; 1154 return CEED_ERROR_SUCCESS; 1155 } 1156 1157 /** 1158 @brief Set internal objects for CeedQFunctionAssemblyData 1159 1160 @param[in] data CeedQFunctionAssemblyData to set objects 1161 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1162 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1163 1164 @return An error code: 0 - success, otherwise - failure 1165 1166 @ref Backend 1167 **/ 1168 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1169 CeedVector vec, CeedElemRestriction rstr) { 1170 int ierr; 1171 1172 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1173 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1174 1175 data->is_setup = true; 1176 return CEED_ERROR_SUCCESS; 1177 } 1178 1179 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1180 CeedVector *vec, CeedElemRestriction *rstr) { 1181 int ierr; 1182 1183 if (!data->is_setup) 1184 // LCOV_EXCL_START 1185 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1186 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1187 // LCOV_EXCL_STOP 1188 1189 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1190 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1191 1192 return CEED_ERROR_SUCCESS; 1193 } 1194 1195 /** 1196 @brief Destroy CeedQFunctionAssemblyData 1197 1198 @param[out] data CeedQFunctionAssemblyData to destroy 1199 1200 @return An error code: 0 - success, otherwise - failure 1201 1202 @ref Backend 1203 **/ 1204 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1205 int ierr; 1206 1207 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1208 1209 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1210 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1211 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1212 1213 ierr = CeedFree(data); CeedChk(ierr); 1214 return CEED_ERROR_SUCCESS; 1215 } 1216 1217 /// @} 1218 1219 /// ---------------------------------------------------------------------------- 1220 /// CeedOperator Public API 1221 /// ---------------------------------------------------------------------------- 1222 /// @addtogroup CeedOperatorUser 1223 /// @{ 1224 1225 /** 1226 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1227 1228 This returns a CeedVector containing a matrix at each quadrature point 1229 providing the action of the CeedQFunction associated with the CeedOperator. 1230 The vector 'assembled' is of shape 1231 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1232 and contains column-major matrices representing the action of the 1233 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1234 outputs are in the order provided by the user when adding CeedOperator fields. 1235 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1236 'v', provided in that order, would result in an assembled QFunction that 1237 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1238 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1239 1240 Note: Calling this function asserts that setup is complete 1241 and sets the CeedOperator as immutable. 1242 1243 @param op CeedOperator to assemble CeedQFunction 1244 @param[out] assembled CeedVector to store assembled CeedQFunction at 1245 quadrature points 1246 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1247 CeedQFunction 1248 @param request Address of CeedRequest for non-blocking completion, else 1249 @ref CEED_REQUEST_IMMEDIATE 1250 1251 @return An error code: 0 - success, otherwise - failure 1252 1253 @ref User 1254 **/ 1255 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1256 CeedElemRestriction *rstr, 1257 CeedRequest *request) { 1258 int ierr; 1259 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1260 1261 // Backend version 1262 if (op->LinearAssembleQFunction) { 1263 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1264 CeedChk(ierr); 1265 } else { 1266 // Fallback to reference Ceed 1267 if (!op->op_fallback) { 1268 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1269 } 1270 // Assemble 1271 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1272 rstr, request); CeedChk(ierr); 1273 } 1274 return CEED_ERROR_SUCCESS; 1275 } 1276 1277 /** 1278 @brief Assemble CeedQFunction and store result internall. Return copied 1279 references of stored data to the caller. Caller is responsible for 1280 ownership and destruction of the copied references. See also 1281 @ref CeedOperatorLinearAssembleQFunction 1282 1283 @param op CeedOperator to assemble CeedQFunction 1284 @param assembled CeedVector to store assembled CeedQFunction at 1285 quadrature points 1286 @param rstr CeedElemRestriction for CeedVector containing assembled 1287 CeedQFunction 1288 @param request Address of CeedRequest for non-blocking completion, else 1289 @ref CEED_REQUEST_IMMEDIATE 1290 1291 @return An error code: 0 - success, otherwise - failure 1292 1293 @ref User 1294 **/ 1295 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1296 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1297 int ierr; 1298 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1299 1300 // Backend version 1301 if (op->LinearAssembleQFunctionUpdate) { 1302 bool qf_assembled_is_setup; 1303 CeedVector assembled_vec = NULL; 1304 CeedElemRestriction assembled_rstr = NULL; 1305 1306 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1307 &qf_assembled_is_setup); CeedChk(ierr); 1308 if (qf_assembled_is_setup) { 1309 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1310 &assembled_rstr); CeedChk(ierr); 1311 1312 bool update_needed; 1313 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1314 &update_needed); CeedChk(ierr); 1315 if (update_needed) { 1316 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1317 request); CeedChk(ierr); 1318 } 1319 } else { 1320 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1321 request); CeedChk(ierr); 1322 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1323 assembled_rstr); CeedChk(ierr); 1324 } 1325 ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); 1326 CeedChk(ierr); 1327 1328 // Copy reference to internally held copy 1329 *assembled = NULL; 1330 *rstr = NULL; 1331 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1332 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1333 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1334 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1335 } else { 1336 // Fallback to reference Ceed 1337 if (!op->op_fallback) { 1338 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1339 } 1340 // Assemble 1341 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1342 assembled, rstr, request); CeedChk(ierr); 1343 } 1344 1345 return CEED_ERROR_SUCCESS; 1346 } 1347 1348 /** 1349 @brief Assemble the diagonal of a square linear CeedOperator 1350 1351 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1352 1353 Note: Currently only non-composite CeedOperators with a single field and 1354 composite CeedOperators with single field sub-operators are supported. 1355 1356 Note: Calling this function asserts that setup is complete 1357 and sets the CeedOperator as immutable. 1358 1359 @param op CeedOperator to assemble CeedQFunction 1360 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1361 @param request Address of CeedRequest for non-blocking completion, else 1362 @ref CEED_REQUEST_IMMEDIATE 1363 1364 @return An error code: 0 - success, otherwise - failure 1365 1366 @ref User 1367 **/ 1368 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1369 CeedRequest *request) { 1370 int ierr; 1371 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1372 1373 CeedSize input_size = 0, output_size = 0; 1374 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1375 CeedChk(ierr); 1376 if (input_size != output_size) 1377 // LCOV_EXCL_START 1378 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1379 // LCOV_EXCL_STOP 1380 1381 // Use backend version, if available 1382 if (op->LinearAssembleDiagonal) { 1383 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1384 return CEED_ERROR_SUCCESS; 1385 } else if (op->LinearAssembleAddDiagonal) { 1386 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1387 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1388 return CEED_ERROR_SUCCESS; 1389 } else { 1390 // Check for valid fallback resource 1391 const char *resource, *fallback_resource; 1392 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1393 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1394 CeedChk(ierr); 1395 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1396 // Fallback to reference Ceed 1397 if (!op->op_fallback) { 1398 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1399 } 1400 // Assemble 1401 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1402 CeedChk(ierr); 1403 return CEED_ERROR_SUCCESS; 1404 } 1405 } 1406 1407 // Default interface implementation 1408 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1409 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1410 CeedChk(ierr); 1411 return CEED_ERROR_SUCCESS; 1412 } 1413 1414 /** 1415 @brief Assemble the diagonal of a square linear CeedOperator 1416 1417 This sums into a CeedVector the diagonal of a linear CeedOperator. 1418 1419 Note: Currently only non-composite CeedOperators with a single field and 1420 composite CeedOperators with single field sub-operators are supported. 1421 1422 Note: Calling this function asserts that setup is complete 1423 and sets the CeedOperator as immutable. 1424 1425 @param op CeedOperator to assemble CeedQFunction 1426 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1427 @param request Address of CeedRequest for non-blocking completion, else 1428 @ref CEED_REQUEST_IMMEDIATE 1429 1430 @return An error code: 0 - success, otherwise - failure 1431 1432 @ref User 1433 **/ 1434 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1435 CeedRequest *request) { 1436 int ierr; 1437 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1438 1439 CeedSize input_size = 0, output_size = 0; 1440 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1441 CeedChk(ierr); 1442 if (input_size != output_size) 1443 // LCOV_EXCL_START 1444 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1445 // LCOV_EXCL_STOP 1446 1447 // Use backend version, if available 1448 if (op->LinearAssembleAddDiagonal) { 1449 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1450 return CEED_ERROR_SUCCESS; 1451 } else { 1452 // Check for valid fallback resource 1453 const char *resource, *fallback_resource; 1454 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1455 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1456 CeedChk(ierr); 1457 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1458 // Fallback to reference Ceed 1459 if (!op->op_fallback) { 1460 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1461 } 1462 // Assemble 1463 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1464 request); CeedChk(ierr); 1465 return CEED_ERROR_SUCCESS; 1466 } 1467 } 1468 1469 // Default interface implementation 1470 bool is_composite; 1471 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1472 if (is_composite) { 1473 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1474 false, assembled); CeedChk(ierr); 1475 return CEED_ERROR_SUCCESS; 1476 } else { 1477 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1478 CeedChk(ierr); 1479 return CEED_ERROR_SUCCESS; 1480 } 1481 } 1482 1483 /** 1484 @brief Assemble the point block diagonal of a square linear CeedOperator 1485 1486 This overwrites a CeedVector with the point block diagonal of a linear 1487 CeedOperator. 1488 1489 Note: Currently only non-composite CeedOperators with a single field and 1490 composite CeedOperators with single field sub-operators are supported. 1491 1492 Note: Calling this function asserts that setup is complete 1493 and sets the CeedOperator as immutable. 1494 1495 @param op CeedOperator to assemble CeedQFunction 1496 @param[out] assembled CeedVector to store assembled CeedOperator point block 1497 diagonal, provided in row-major form with an 1498 @a num_comp * @a num_comp block at each node. The dimensions 1499 of this vector are derived from the active vector 1500 for the CeedOperator. The array has shape 1501 [nodes, component out, component in]. 1502 @param request Address of CeedRequest for non-blocking completion, else 1503 CEED_REQUEST_IMMEDIATE 1504 1505 @return An error code: 0 - success, otherwise - failure 1506 1507 @ref User 1508 **/ 1509 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1510 CeedVector assembled, CeedRequest *request) { 1511 int ierr; 1512 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1513 1514 CeedSize input_size = 0, output_size = 0; 1515 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1516 CeedChk(ierr); 1517 if (input_size != output_size) 1518 // LCOV_EXCL_START 1519 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1520 // LCOV_EXCL_STOP 1521 1522 // Use backend version, if available 1523 if (op->LinearAssemblePointBlockDiagonal) { 1524 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1525 CeedChk(ierr); 1526 return CEED_ERROR_SUCCESS; 1527 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1528 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1529 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1530 request); CeedChk(ierr); 1531 return CEED_ERROR_SUCCESS; 1532 } else { 1533 // Check for valid fallback resource 1534 const char *resource, *fallback_resource; 1535 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1536 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1537 CeedChk(ierr); 1538 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1539 // Fallback to reference Ceed 1540 if (!op->op_fallback) { 1541 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1542 } 1543 // Assemble 1544 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1545 assembled, request); CeedChk(ierr); 1546 return CEED_ERROR_SUCCESS; 1547 } 1548 } 1549 1550 // Default interface implementation 1551 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1552 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1553 CeedChk(ierr); 1554 return CEED_ERROR_SUCCESS; 1555 } 1556 1557 /** 1558 @brief Assemble the point block diagonal of a square linear CeedOperator 1559 1560 This sums into a CeedVector with the point block diagonal of a linear 1561 CeedOperator. 1562 1563 Note: Currently only non-composite CeedOperators with a single field and 1564 composite CeedOperators with single field sub-operators are supported. 1565 1566 Note: Calling this function asserts that setup is complete 1567 and sets the CeedOperator as immutable. 1568 1569 @param op CeedOperator to assemble CeedQFunction 1570 @param[out] assembled CeedVector to store assembled CeedOperator point block 1571 diagonal, provided in row-major form with an 1572 @a num_comp * @a num_comp block at each node. The dimensions 1573 of this vector are derived from the active vector 1574 for the CeedOperator. The array has shape 1575 [nodes, component out, component in]. 1576 @param request Address of CeedRequest for non-blocking completion, else 1577 CEED_REQUEST_IMMEDIATE 1578 1579 @return An error code: 0 - success, otherwise - failure 1580 1581 @ref User 1582 **/ 1583 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1584 CeedVector assembled, CeedRequest *request) { 1585 int ierr; 1586 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1587 1588 CeedSize input_size = 0, output_size = 0; 1589 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1590 CeedChk(ierr); 1591 if (input_size != output_size) 1592 // LCOV_EXCL_START 1593 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1594 // LCOV_EXCL_STOP 1595 1596 // Use backend version, if available 1597 if (op->LinearAssembleAddPointBlockDiagonal) { 1598 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1599 CeedChk(ierr); 1600 return CEED_ERROR_SUCCESS; 1601 } else { 1602 // Check for valid fallback resource 1603 const char *resource, *fallback_resource; 1604 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1605 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1606 CeedChk(ierr); 1607 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1608 // Fallback to reference Ceed 1609 if (!op->op_fallback) { 1610 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1611 } 1612 // Assemble 1613 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1614 assembled, request); CeedChk(ierr); 1615 return CEED_ERROR_SUCCESS; 1616 } 1617 } 1618 1619 // Default interface implemenation 1620 bool is_composite; 1621 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1622 if (is_composite) { 1623 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1624 true, assembled); CeedChk(ierr); 1625 return CEED_ERROR_SUCCESS; 1626 } else { 1627 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1628 CeedChk(ierr); 1629 return CEED_ERROR_SUCCESS; 1630 } 1631 } 1632 1633 /** 1634 @brief Fully assemble the nonzero pattern of a linear operator. 1635 1636 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1637 1638 The assembly routines use coordinate format, with num_entries tuples of the 1639 form (i, j, value) which indicate that value should be added to the matrix 1640 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1641 This function returns the number of entries and their (i, j) locations, 1642 while CeedOperatorLinearAssemble() provides the values in the same 1643 ordering. 1644 1645 This will generally be slow unless your operator is low-order. 1646 1647 Note: Calling this function asserts that setup is complete 1648 and sets the CeedOperator as immutable. 1649 1650 @param[in] op CeedOperator to assemble 1651 @param[out] num_entries Number of entries in coordinate nonzero pattern 1652 @param[out] rows Row number for each entry 1653 @param[out] cols Column number for each entry 1654 1655 @ref User 1656 **/ 1657 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1658 CeedInt **rows, CeedInt **cols) { 1659 int ierr; 1660 CeedInt num_suboperators, single_entries; 1661 CeedOperator *sub_operators; 1662 bool is_composite; 1663 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1664 1665 // Use backend version, if available 1666 if (op->LinearAssembleSymbolic) { 1667 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1668 return CEED_ERROR_SUCCESS; 1669 } else { 1670 // Check for valid fallback resource 1671 const char *resource, *fallback_resource; 1672 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1673 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1674 CeedChk(ierr); 1675 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1676 // Fallback to reference Ceed 1677 if (!op->op_fallback) { 1678 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1679 } 1680 // Assemble 1681 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1682 cols); CeedChk(ierr); 1683 return CEED_ERROR_SUCCESS; 1684 } 1685 } 1686 1687 // Default interface implementation 1688 1689 // count entries and allocate rows, cols arrays 1690 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1691 *num_entries = 0; 1692 if (is_composite) { 1693 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1694 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1695 for (int k = 0; k < num_suboperators; ++k) { 1696 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1697 &single_entries); CeedChk(ierr); 1698 *num_entries += single_entries; 1699 } 1700 } else { 1701 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1702 &single_entries); CeedChk(ierr); 1703 *num_entries += single_entries; 1704 } 1705 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1706 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1707 1708 // assemble nonzero locations 1709 CeedInt offset = 0; 1710 if (is_composite) { 1711 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1712 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1713 for (int k = 0; k < num_suboperators; ++k) { 1714 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1715 *cols); CeedChk(ierr); 1716 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1717 &single_entries); 1718 CeedChk(ierr); 1719 offset += single_entries; 1720 } 1721 } else { 1722 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1723 CeedChk(ierr); 1724 } 1725 1726 return CEED_ERROR_SUCCESS; 1727 } 1728 1729 /** 1730 @brief Fully assemble the nonzero entries of a linear operator. 1731 1732 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1733 1734 The assembly routines use coordinate format, with num_entries tuples of the 1735 form (i, j, value) which indicate that value should be added to the matrix 1736 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1737 This function returns the values of the nonzero entries to be added, their 1738 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1739 1740 This will generally be slow unless your operator is low-order. 1741 1742 Note: Calling this function asserts that setup is complete 1743 and sets the CeedOperator as immutable. 1744 1745 @param[in] op CeedOperator to assemble 1746 @param[out] values Values to assemble into matrix 1747 1748 @ref User 1749 **/ 1750 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1751 int ierr; 1752 CeedInt num_suboperators, single_entries = 0; 1753 CeedOperator *sub_operators; 1754 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1755 1756 // Use backend version, if available 1757 if (op->LinearAssemble) { 1758 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1759 return CEED_ERROR_SUCCESS; 1760 } else { 1761 // Check for valid fallback resource 1762 const char *resource, *fallback_resource; 1763 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1764 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1765 CeedChk(ierr); 1766 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1767 // Fallback to reference Ceed 1768 if (!op->op_fallback) { 1769 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1770 } 1771 // Assemble 1772 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1773 return CEED_ERROR_SUCCESS; 1774 } 1775 } 1776 1777 // Default interface implementation 1778 bool is_composite; 1779 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1780 1781 CeedInt offset = 0; 1782 if (is_composite) { 1783 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1784 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1785 for (int k = 0; k < num_suboperators; ++k) { 1786 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1787 CeedChk(ierr); 1788 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1789 &single_entries); 1790 CeedChk(ierr); 1791 offset += single_entries; 1792 } 1793 } else { 1794 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1795 } 1796 1797 return CEED_ERROR_SUCCESS; 1798 } 1799 1800 /** 1801 @brief Create a multigrid coarse operator and level transfer operators 1802 for a CeedOperator, creating the prolongation basis from the 1803 fine and coarse grid interpolation 1804 1805 Note: Calling this function asserts that setup is complete 1806 and sets the CeedOperator as immutable. 1807 1808 @param[in] op_fine Fine grid operator 1809 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1810 @param[in] rstr_coarse Coarse grid restriction 1811 @param[in] basis_coarse Coarse grid active vector basis 1812 @param[out] op_coarse Coarse grid operator 1813 @param[out] op_prolong Coarse to fine operator 1814 @param[out] op_restrict Fine to coarse operator 1815 1816 @return An error code: 0 - success, otherwise - failure 1817 1818 @ref User 1819 **/ 1820 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1821 CeedVector p_mult_fine, 1822 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1823 CeedOperator *op_coarse, CeedOperator *op_prolong, 1824 CeedOperator *op_restrict) { 1825 int ierr; 1826 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1827 Ceed ceed; 1828 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1829 1830 // Check for compatible quadrature spaces 1831 CeedBasis basis_fine; 1832 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1833 CeedInt Q_f, Q_c; 1834 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1835 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1836 if (Q_f != Q_c) 1837 // LCOV_EXCL_START 1838 return CeedError(ceed, CEED_ERROR_DIMENSION, 1839 "Bases must have compatible quadrature spaces"); 1840 // LCOV_EXCL_STOP 1841 1842 // Coarse to fine basis 1843 CeedInt P_f, P_c, Q = Q_f; 1844 bool is_tensor_f, is_tensor_c; 1845 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1846 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1847 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1848 if (is_tensor_f && is_tensor_c) { 1849 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1850 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1851 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1852 } else if (!is_tensor_f && !is_tensor_c) { 1853 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1854 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1855 } else { 1856 // LCOV_EXCL_START 1857 return CeedError(ceed, CEED_ERROR_MINOR, 1858 "Bases must both be tensor or non-tensor"); 1859 // LCOV_EXCL_STOP 1860 } 1861 1862 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1863 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1864 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1865 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1866 if (is_tensor_f) { 1867 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1868 memcpy(interp_c, basis_coarse->interp_1d, 1869 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1870 } else { 1871 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1872 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1873 } 1874 1875 // -- QR Factorization, interp_f = Q R 1876 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1877 1878 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1879 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1880 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1881 1882 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1883 for (CeedInt j=0; j<P_c; j++) { // Column j 1884 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1885 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1886 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1887 for (CeedInt k=i+1; k<P_f; k++) 1888 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1889 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1890 } 1891 } 1892 ierr = CeedFree(&tau); CeedChk(ierr); 1893 ierr = CeedFree(&interp_c); CeedChk(ierr); 1894 ierr = CeedFree(&interp_f); CeedChk(ierr); 1895 1896 // Complete with interp_c_to_f versions of code 1897 if (is_tensor_f) { 1898 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1899 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1900 CeedChk(ierr); 1901 } else { 1902 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1903 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1904 CeedChk(ierr); 1905 } 1906 1907 // Cleanup 1908 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1909 return CEED_ERROR_SUCCESS; 1910 } 1911 1912 /** 1913 @brief Create a multigrid coarse operator and level transfer operators 1914 for a CeedOperator with a tensor basis for the active basis 1915 1916 Note: Calling this function asserts that setup is complete 1917 and sets the CeedOperator as immutable. 1918 1919 @param[in] op_fine Fine grid operator 1920 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1921 @param[in] rstr_coarse Coarse grid restriction 1922 @param[in] basis_coarse Coarse grid active vector basis 1923 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1924 @param[out] op_coarse Coarse grid operator 1925 @param[out] op_prolong Coarse to fine operator 1926 @param[out] op_restrict Fine to coarse operator 1927 1928 @return An error code: 0 - success, otherwise - failure 1929 1930 @ref User 1931 **/ 1932 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1933 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1934 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1935 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1936 int ierr; 1937 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1938 Ceed ceed; 1939 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1940 1941 // Check for compatible quadrature spaces 1942 CeedBasis basis_fine; 1943 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1944 CeedInt Q_f, Q_c; 1945 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1946 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1947 if (Q_f != Q_c) 1948 // LCOV_EXCL_START 1949 return CeedError(ceed, CEED_ERROR_DIMENSION, 1950 "Bases must have compatible quadrature spaces"); 1951 // LCOV_EXCL_STOP 1952 1953 // Coarse to fine basis 1954 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1955 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1956 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1957 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1958 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1959 CeedChk(ierr); 1960 P_1d_c = dim == 1 ? num_nodes_c : 1961 dim == 2 ? sqrt(num_nodes_c) : 1962 cbrt(num_nodes_c); 1963 CeedScalar *q_ref, *q_weight, *grad; 1964 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1965 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1966 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1967 CeedBasis basis_c_to_f; 1968 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1969 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1970 CeedChk(ierr); 1971 ierr = CeedFree(&q_ref); CeedChk(ierr); 1972 ierr = CeedFree(&q_weight); CeedChk(ierr); 1973 ierr = CeedFree(&grad); CeedChk(ierr); 1974 1975 // Core code 1976 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1977 basis_coarse, basis_c_to_f, op_coarse, 1978 op_prolong, op_restrict); 1979 CeedChk(ierr); 1980 return CEED_ERROR_SUCCESS; 1981 } 1982 1983 /** 1984 @brief Create a multigrid coarse operator and level transfer operators 1985 for a CeedOperator with a non-tensor basis for the active vector 1986 1987 Note: Calling this function asserts that setup is complete 1988 and sets the CeedOperator as immutable. 1989 1990 @param[in] op_fine Fine grid operator 1991 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1992 @param[in] rstr_coarse Coarse grid restriction 1993 @param[in] basis_coarse Coarse grid active vector basis 1994 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1995 @param[out] op_coarse Coarse grid operator 1996 @param[out] op_prolong Coarse to fine operator 1997 @param[out] op_restrict Fine to coarse operator 1998 1999 @return An error code: 0 - success, otherwise - failure 2000 2001 @ref User 2002 **/ 2003 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 2004 CeedVector p_mult_fine, 2005 CeedElemRestriction rstr_coarse, 2006 CeedBasis basis_coarse, 2007 const CeedScalar *interp_c_to_f, 2008 CeedOperator *op_coarse, 2009 CeedOperator *op_prolong, 2010 CeedOperator *op_restrict) { 2011 int ierr; 2012 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2013 Ceed ceed; 2014 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2015 2016 // Check for compatible quadrature spaces 2017 CeedBasis basis_fine; 2018 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2019 CeedInt Q_f, Q_c; 2020 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2021 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2022 if (Q_f != Q_c) 2023 // LCOV_EXCL_START 2024 return CeedError(ceed, CEED_ERROR_DIMENSION, 2025 "Bases must have compatible quadrature spaces"); 2026 // LCOV_EXCL_STOP 2027 2028 // Coarse to fine basis 2029 CeedElemTopology topo; 2030 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 2031 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2032 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2033 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2034 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 2035 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2036 CeedChk(ierr); 2037 CeedScalar *q_ref, *q_weight, *grad; 2038 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 2039 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 2040 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 2041 CeedBasis basis_c_to_f; 2042 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 2043 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2044 CeedChk(ierr); 2045 ierr = CeedFree(&q_ref); CeedChk(ierr); 2046 ierr = CeedFree(&q_weight); CeedChk(ierr); 2047 ierr = CeedFree(&grad); CeedChk(ierr); 2048 2049 // Core code 2050 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2051 basis_coarse, basis_c_to_f, op_coarse, 2052 op_prolong, op_restrict); 2053 CeedChk(ierr); 2054 return CEED_ERROR_SUCCESS; 2055 } 2056 2057 /** 2058 @brief Build a FDM based approximate inverse for each element for a 2059 CeedOperator 2060 2061 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2062 Method based approximate inverse. This function obtains the simultaneous 2063 diagonalization for the 1D mass and Laplacian operators, 2064 M = V^T V, K = V^T S V. 2065 The assembled QFunction is used to modify the eigenvalues from simultaneous 2066 diagonalization and obtain an approximate inverse of the form 2067 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2068 associated CeedQFunction must therefore also be linear. 2069 2070 Note: Calling this function asserts that setup is complete 2071 and sets the CeedOperator as immutable. 2072 2073 @param op CeedOperator to create element inverses 2074 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2075 for each element 2076 @param request Address of CeedRequest for non-blocking completion, else 2077 @ref CEED_REQUEST_IMMEDIATE 2078 2079 @return An error code: 0 - success, otherwise - failure 2080 2081 @ref User 2082 **/ 2083 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2084 CeedRequest *request) { 2085 int ierr; 2086 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2087 2088 // Use backend version, if available 2089 if (op->CreateFDMElementInverse) { 2090 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2091 return CEED_ERROR_SUCCESS; 2092 } else { 2093 // Check for valid fallback resource 2094 const char *resource, *fallback_resource; 2095 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2096 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2097 CeedChk(ierr); 2098 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2099 // Fallback to reference Ceed 2100 if (!op->op_fallback) { 2101 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2102 } 2103 // Assemble 2104 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2105 CeedChk(ierr); 2106 return CEED_ERROR_SUCCESS; 2107 } 2108 } 2109 2110 // Interface implementation 2111 Ceed ceed, ceed_parent; 2112 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2113 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2114 ceed_parent = ceed_parent ? ceed_parent : ceed; 2115 CeedQFunction qf; 2116 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2117 2118 // Determine active input basis 2119 bool interp = false, grad = false; 2120 CeedBasis basis = NULL; 2121 CeedElemRestriction rstr = NULL; 2122 CeedOperatorField *op_fields; 2123 CeedQFunctionField *qf_fields; 2124 CeedInt num_input_fields; 2125 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2126 CeedChk(ierr); 2127 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2128 for (CeedInt i=0; i<num_input_fields; i++) { 2129 CeedVector vec; 2130 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2131 if (vec == CEED_VECTOR_ACTIVE) { 2132 CeedEvalMode eval_mode; 2133 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2134 interp = interp || eval_mode == CEED_EVAL_INTERP; 2135 grad = grad || eval_mode == CEED_EVAL_GRAD; 2136 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2137 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2138 } 2139 } 2140 if (!basis) 2141 // LCOV_EXCL_START 2142 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2143 // LCOV_EXCL_STOP 2144 CeedSize l_size = 1; 2145 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2146 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2147 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2148 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2149 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2150 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2151 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2152 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2153 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2154 2155 // Build and diagonalize 1D Mass and Laplacian 2156 bool tensor_basis; 2157 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2158 if (!tensor_basis) 2159 // LCOV_EXCL_START 2160 return CeedError(ceed, CEED_ERROR_BACKEND, 2161 "FDMElementInverse only supported for tensor " 2162 "bases"); 2163 // LCOV_EXCL_STOP 2164 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2165 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2166 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2167 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2168 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2169 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2170 // -- Build matrices 2171 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2172 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2173 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2174 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2175 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2176 mass, laplace); CeedChk(ierr); 2177 2178 // -- Diagonalize 2179 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2180 CeedChk(ierr); 2181 ierr = CeedFree(&mass); CeedChk(ierr); 2182 ierr = CeedFree(&laplace); CeedChk(ierr); 2183 for (CeedInt i=0; i<P_1d; i++) 2184 for (CeedInt j=0; j<P_1d; j++) 2185 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2186 ierr = CeedFree(&x); CeedChk(ierr); 2187 2188 // Assemble QFunction 2189 CeedVector assembled; 2190 CeedElemRestriction rstr_qf; 2191 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2192 &rstr_qf, request); CeedChk(ierr); 2193 CeedInt layout[3]; 2194 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2195 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2196 CeedScalar max_norm = 0; 2197 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2198 2199 // Calculate element averages 2200 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2201 CeedScalar *elem_avg; 2202 const CeedScalar *assembled_array, *q_weight_array; 2203 CeedVector q_weight; 2204 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2205 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2206 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2207 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2208 CeedChk(ierr); 2209 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2210 CeedChk(ierr); 2211 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2212 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2213 for (CeedInt e=0; e<num_elem; e++) { 2214 CeedInt count = 0; 2215 for (CeedInt q=0; q<num_qpts; q++) 2216 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2217 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2218 qf_value_bound) { 2219 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2220 q_weight_array[q]; 2221 count++; 2222 } 2223 if (count) { 2224 elem_avg[e] /= count; 2225 } else { 2226 elem_avg[e] = 1.0; 2227 } 2228 } 2229 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2230 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2231 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2232 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2233 2234 // Build FDM diagonal 2235 CeedVector q_data; 2236 CeedScalar *q_data_array, *fdm_diagonal; 2237 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2238 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2239 for (CeedInt c=0; c<num_comp; c++) 2240 for (CeedInt n=0; n<elem_size; n++) { 2241 if (interp) 2242 fdm_diagonal[c*elem_size + n] = 1.0; 2243 if (grad) 2244 for (CeedInt d=0; d<dim; d++) { 2245 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2246 fdm_diagonal[c*elem_size + n] += lambda[i]; 2247 } 2248 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2249 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2250 } 2251 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2252 CeedChk(ierr); 2253 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2254 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2255 CeedChk(ierr); 2256 for (CeedInt e=0; e<num_elem; e++) 2257 for (CeedInt c=0; c<num_comp; c++) 2258 for (CeedInt n=0; n<elem_size; n++) 2259 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2260 fdm_diagonal[c*elem_size + n]); 2261 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2262 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2263 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2264 2265 // Setup FDM operator 2266 // -- Basis 2267 CeedBasis fdm_basis; 2268 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2269 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2270 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2271 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2272 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2273 fdm_interp, grad_dummy, q_ref_dummy, 2274 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2275 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2276 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2277 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2278 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2279 ierr = CeedFree(&lambda); CeedChk(ierr); 2280 2281 // -- Restriction 2282 CeedElemRestriction rstr_qd_i; 2283 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2284 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2285 num_comp, num_elem*num_comp*elem_size, 2286 strides, &rstr_qd_i); CeedChk(ierr); 2287 // -- QFunction 2288 CeedQFunction qf_fdm; 2289 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2290 CeedChk(ierr); 2291 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2292 CeedChk(ierr); 2293 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2294 CeedChk(ierr); 2295 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2296 CeedChk(ierr); 2297 // -- QFunction context 2298 CeedInt *num_comp_data; 2299 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2300 num_comp_data[0] = num_comp; 2301 CeedQFunctionContext ctx_fdm; 2302 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2303 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2304 sizeof(*num_comp_data), num_comp_data); 2305 CeedChk(ierr); 2306 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2307 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2308 // -- Operator 2309 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2310 CeedChk(ierr); 2311 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2312 CeedChk(ierr); 2313 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2314 q_data); CeedChk(ierr); 2315 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2316 CeedChk(ierr); 2317 2318 // Cleanup 2319 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2320 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2321 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2322 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2323 2324 return CEED_ERROR_SUCCESS; 2325 } 2326 2327 /// @} 2328