1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedOperator preconditioning interfaces 27 28 /// ---------------------------------------------------------------------------- 29 /// CeedOperator Library Internal Preconditioning Functions 30 /// ---------------------------------------------------------------------------- 31 /// @addtogroup CeedOperatorDeveloper 32 /// @{ 33 34 /** 35 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 36 CeedOperator functionality 37 38 @param op CeedOperator to create fallback for 39 40 @return An error code: 0 - success, otherwise - failure 41 42 @ref Developer 43 **/ 44 int CeedOperatorCreateFallback(CeedOperator op) { 45 int ierr; 46 47 // Fallback Ceed 48 const char *resource, *fallback_resource; 49 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 50 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 51 CeedChk(ierr); 52 if (!strcmp(resource, fallback_resource)) 53 // LCOV_EXCL_START 54 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 55 "Backend %s cannot create an operator" 56 "fallback to resource %s", resource, fallback_resource); 57 // LCOV_EXCL_STOP 58 59 // Fallback Ceed 60 Ceed ceed_ref; 61 if (!op->ceed->op_fallback_ceed) { 62 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 63 ceed_ref->op_fallback_parent = op->ceed; 64 ceed_ref->Error = op->ceed->Error; 65 op->ceed->op_fallback_ceed = ceed_ref; 66 } 67 ceed_ref = op->ceed->op_fallback_ceed; 68 69 // Clone Op 70 CeedOperator op_ref; 71 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 72 memcpy(op_ref, op, sizeof(*op_ref)); 73 op_ref->data = NULL; 74 op_ref->is_interface_setup = false; 75 op_ref->is_backend_setup = false; 76 op_ref->ceed = ceed_ref; 77 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 78 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 79 &op_ref->qf_assembled); CeedChk(ierr); 80 op->op_fallback = op_ref; 81 82 // Clone QF 83 CeedQFunction qf_ref; 84 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 85 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 86 qf_ref->data = NULL; 87 qf_ref->ceed = ceed_ref; 88 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 89 op_ref->qf = qf_ref; 90 op->qf_fallback = qf_ref; 91 return CEED_ERROR_SUCCESS; 92 } 93 94 /** 95 @brief Select correct basis matrix pointer based on CeedEvalMode 96 97 @param[in] eval_mode Current basis evaluation mode 98 @param[in] identity Pointer to identity matrix 99 @param[in] interp Pointer to interpolation matrix 100 @param[in] grad Pointer to gradient matrix 101 @param[out] basis_ptr Basis pointer to set 102 103 @ref Developer 104 **/ 105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 106 const CeedScalar *identity, const CeedScalar *interp, 107 const CeedScalar *grad, const CeedScalar **basis_ptr) { 108 switch (eval_mode) { 109 case CEED_EVAL_NONE: 110 *basis_ptr = identity; 111 break; 112 case CEED_EVAL_INTERP: 113 *basis_ptr = interp; 114 break; 115 case CEED_EVAL_GRAD: 116 *basis_ptr = grad; 117 break; 118 case CEED_EVAL_WEIGHT: 119 case CEED_EVAL_DIV: 120 case CEED_EVAL_CURL: 121 break; // Caught by QF Assembly 122 } 123 } 124 125 /** 126 @brief Create point block restriction for active operator field 127 128 @param[in] rstr Original CeedElemRestriction for active field 129 @param[out] pointblock_rstr Address of the variable where the newly created 130 CeedElemRestriction will be stored 131 132 @return An error code: 0 - success, otherwise - failure 133 134 @ref Developer 135 **/ 136 static int CeedOperatorCreateActivePointBlockRestriction( 137 CeedElemRestriction rstr, 138 CeedElemRestriction *pointblock_rstr) { 139 int ierr; 140 Ceed ceed; 141 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 142 const CeedInt *offsets; 143 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 144 CeedChk(ierr); 145 146 // Expand offsets 147 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 148 *pointblock_offsets; 149 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 150 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 151 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 152 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 153 CeedInt shift = num_comp; 154 if (comp_stride != 1) 155 shift *= num_comp; 156 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 157 CeedChk(ierr); 158 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 159 pointblock_offsets[i] = offsets[i]*shift; 160 if (pointblock_offsets[i] > max) 161 max = pointblock_offsets[i]; 162 } 163 164 // Create new restriction 165 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 166 1, max + num_comp*num_comp, CEED_MEM_HOST, 167 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 168 CeedChk(ierr); 169 170 // Cleanup 171 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 172 173 return CEED_ERROR_SUCCESS; 174 } 175 176 /** 177 @brief Core logic for assembling operator diagonal or point block diagonal 178 179 @param[in] op CeedOperator to assemble point block diagonal 180 @param[in] request Address of CeedRequest for non-blocking completion, else 181 CEED_REQUEST_IMMEDIATE 182 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 183 @param[out] assembled CeedVector to store assembled diagonal 184 185 @return An error code: 0 - success, otherwise - failure 186 187 @ref Developer 188 **/ 189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 190 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 191 int ierr; 192 Ceed ceed; 193 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 194 195 // Assemble QFunction 196 CeedQFunction qf; 197 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 198 CeedInt num_input_fields, num_output_fields; 199 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 200 CeedChk(ierr); 201 CeedVector assembled_qf; 202 CeedElemRestriction rstr; 203 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 204 &rstr, request); CeedChk(ierr); 205 CeedInt layout[3]; 206 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 207 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 208 CeedScalar max_norm = 0; 209 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 210 211 // Determine active input basis 212 CeedOperatorField *op_fields; 213 CeedQFunctionField *qf_fields; 214 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 215 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 216 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 217 CeedEvalMode *eval_mode_in = NULL; 218 CeedBasis basis_in = NULL; 219 CeedElemRestriction rstr_in = NULL; 220 for (CeedInt i=0; i<num_input_fields; i++) { 221 CeedVector vec; 222 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 223 if (vec == CEED_VECTOR_ACTIVE) { 224 CeedElemRestriction rstr; 225 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 226 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 227 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 228 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 229 if (rstr_in && rstr_in != rstr) 230 // LCOV_EXCL_START 231 return CeedError(ceed, CEED_ERROR_BACKEND, 232 "Multi-field non-composite operator diagonal assembly not supported"); 233 // LCOV_EXCL_STOP 234 rstr_in = rstr; 235 CeedEvalMode eval_mode; 236 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 237 switch (eval_mode) { 238 case CEED_EVAL_NONE: 239 case CEED_EVAL_INTERP: 240 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 241 eval_mode_in[num_eval_mode_in] = eval_mode; 242 num_eval_mode_in += 1; 243 break; 244 case CEED_EVAL_GRAD: 245 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 246 for (CeedInt d=0; d<dim; d++) 247 eval_mode_in[num_eval_mode_in+d] = eval_mode; 248 num_eval_mode_in += dim; 249 break; 250 case CEED_EVAL_WEIGHT: 251 case CEED_EVAL_DIV: 252 case CEED_EVAL_CURL: 253 break; // Caught by QF Assembly 254 } 255 } 256 } 257 258 // Determine active output basis 259 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 260 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 261 CeedInt num_eval_mode_out = 0; 262 CeedEvalMode *eval_mode_out = NULL; 263 CeedBasis basis_out = NULL; 264 CeedElemRestriction rstr_out = NULL; 265 for (CeedInt i=0; i<num_output_fields; i++) { 266 CeedVector vec; 267 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 268 if (vec == CEED_VECTOR_ACTIVE) { 269 CeedElemRestriction rstr; 270 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 271 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 272 if (rstr_out && rstr_out != rstr) 273 // LCOV_EXCL_START 274 return CeedError(ceed, CEED_ERROR_BACKEND, 275 "Multi-field non-composite operator diagonal assembly not supported"); 276 // LCOV_EXCL_STOP 277 rstr_out = rstr; 278 CeedEvalMode eval_mode; 279 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 280 switch (eval_mode) { 281 case CEED_EVAL_NONE: 282 case CEED_EVAL_INTERP: 283 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 284 eval_mode_out[num_eval_mode_out] = eval_mode; 285 num_eval_mode_out += 1; 286 break; 287 case CEED_EVAL_GRAD: 288 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 289 for (CeedInt d=0; d<dim; d++) 290 eval_mode_out[num_eval_mode_out+d] = eval_mode; 291 num_eval_mode_out += dim; 292 break; 293 case CEED_EVAL_WEIGHT: 294 case CEED_EVAL_DIV: 295 case CEED_EVAL_CURL: 296 break; // Caught by QF Assembly 297 } 298 } 299 } 300 301 // Assemble point block diagonal restriction, if needed 302 CeedElemRestriction diag_rstr = rstr_out; 303 if (is_pointblock) { 304 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 305 CeedChk(ierr); 306 } 307 308 // Create diagonal vector 309 CeedVector elem_diag; 310 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 311 CeedChk(ierr); 312 313 // Assemble element operator diagonals 314 CeedScalar *elem_diag_array; 315 const CeedScalar *assembled_qf_array; 316 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 317 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 318 CeedChk(ierr); 319 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 320 CeedChk(ierr); 321 CeedInt num_elem, num_nodes, num_qpts; 322 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 323 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 324 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 325 // Basis matrices 326 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 327 CeedScalar *identity = NULL; 328 bool evalNone = false; 329 for (CeedInt i=0; i<num_eval_mode_in; i++) 330 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 331 for (CeedInt i=0; i<num_eval_mode_out; i++) 332 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 333 if (evalNone) { 334 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 335 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 336 identity[i*num_nodes+i] = 1.0; 337 } 338 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 339 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 340 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 341 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 342 // Compute the diagonal of B^T D B 343 // Each element 344 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 345 for (CeedInt e=0; e<num_elem; e++) { 346 CeedInt d_out = -1; 347 // Each basis eval mode pair 348 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 349 const CeedScalar *bt = NULL; 350 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 351 d_out += 1; 352 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 353 &grad_out[d_out*num_qpts*num_nodes], &bt); 354 CeedInt d_in = -1; 355 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 356 const CeedScalar *b = NULL; 357 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 358 d_in += 1; 359 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 360 &grad_in[d_in*num_qpts*num_nodes], &b); 361 // Each component 362 for (CeedInt c_out=0; c_out<num_comp; c_out++) 363 // Each qpoint/node pair 364 for (CeedInt q=0; q<num_qpts; q++) 365 if (is_pointblock) { 366 // Point Block Diagonal 367 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 368 const CeedScalar qf_value = 369 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 370 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 371 if (fabs(qf_value) > qf_value_bound) 372 for (CeedInt n=0; n<num_nodes; n++) 373 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 374 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 375 } 376 } else { 377 // Diagonal Only 378 const CeedScalar qf_value = 379 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 380 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 381 if (fabs(qf_value) > qf_value_bound) 382 for (CeedInt n=0; n<num_nodes; n++) 383 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 384 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 385 } 386 } 387 } 388 } 389 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 390 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 391 CeedChk(ierr); 392 393 // Assemble local operator diagonal 394 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 395 assembled, request); CeedChk(ierr); 396 397 // Cleanup 398 if (is_pointblock) { 399 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 400 } 401 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 402 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 403 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 404 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 405 ierr = CeedFree(&identity); CeedChk(ierr); 406 407 return CEED_ERROR_SUCCESS; 408 } 409 410 /** 411 @brief Core logic for assembling composite operator diagonal 412 413 @param[in] op CeedOperator to assemble point block diagonal 414 @param[in] request Address of CeedRequest for non-blocking completion, else 415 CEED_REQUEST_IMMEDIATE 416 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 417 @param[out] assembled CeedVector to store assembled diagonal 418 419 @return An error code: 0 - success, otherwise - failure 420 421 @ref Developer 422 **/ 423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 424 CeedOperator op, CeedRequest *request, const bool is_pointblock, 425 CeedVector assembled) { 426 int ierr; 427 CeedInt num_sub; 428 CeedOperator *suboperators; 429 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 430 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 431 for (CeedInt i = 0; i < num_sub; i++) { 432 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 433 is_pointblock, assembled); CeedChk(ierr); 434 } 435 return CEED_ERROR_SUCCESS; 436 } 437 438 /** 439 @brief Build nonzero pattern for non-composite operator 440 441 Users should generally use CeedOperatorLinearAssembleSymbolic() 442 443 @param[in] op CeedOperator to assemble nonzero pattern 444 @param[in] offset Offset for number of entries 445 @param[out] rows Row number for each entry 446 @param[out] cols Column number for each entry 447 448 @return An error code: 0 - success, otherwise - failure 449 450 @ref Developer 451 **/ 452 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 453 CeedInt *rows, CeedInt *cols) { 454 int ierr; 455 Ceed ceed = op->ceed; 456 if (op->is_composite) 457 // LCOV_EXCL_START 458 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 459 "Composite operator not supported"); 460 // LCOV_EXCL_STOP 461 462 CeedElemRestriction rstr_in; 463 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 464 CeedSize num_nodes; 465 CeedInt num_elem, elem_size, num_comp; 466 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 467 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 468 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 469 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 470 CeedInt layout_er[3]; 471 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 472 473 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 474 475 // Determine elem_dof relation 476 CeedVector index_vec; 477 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 478 CeedScalar *array; 479 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 480 for (CeedInt i = 0; i < num_nodes; ++i) { 481 array[i] = i; 482 } 483 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 484 CeedVector elem_dof; 485 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 486 CeedChk(ierr); 487 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 488 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 489 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 490 const CeedScalar *elem_dof_a; 491 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 492 CeedChk(ierr); 493 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 494 495 // Determine i, j locations for element matrices 496 CeedInt count = 0; 497 for (int e = 0; e < num_elem; ++e) { 498 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 499 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 500 for (int i = 0; i < elem_size; ++i) { 501 for (int j = 0; j < elem_size; ++j) { 502 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 503 (comp_out)*layout_er[1] + e*layout_er[2]; 504 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 505 (comp_in)*layout_er[1] + e*layout_er[2]; 506 507 const CeedInt row = elem_dof_a[elem_dof_index_row]; 508 const CeedInt col = elem_dof_a[elem_dof_index_col]; 509 510 rows[offset + count] = row; 511 cols[offset + count] = col; 512 count++; 513 } 514 } 515 } 516 } 517 } 518 if (count != local_num_entries) 519 // LCOV_EXCL_START 520 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 521 // LCOV_EXCL_STOP 522 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 523 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 524 525 return CEED_ERROR_SUCCESS; 526 } 527 528 /** 529 @brief Assemble nonzero entries for non-composite operator 530 531 Users should generally use CeedOperatorLinearAssemble() 532 533 @param[in] op CeedOperator to assemble 534 @param[out] offset Offest for number of entries 535 @param[out] values Values to assemble into matrix 536 537 @return An error code: 0 - success, otherwise - failure 538 539 @ref Developer 540 **/ 541 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 542 CeedVector values) { 543 int ierr; 544 Ceed ceed = op->ceed; 545 if (op->is_composite) 546 // LCOV_EXCL_START 547 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 548 "Composite operator not supported"); 549 // LCOV_EXCL_STOP 550 551 // Assemble QFunction 552 CeedQFunction qf; 553 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 554 CeedVector assembled_qf; 555 CeedElemRestriction rstr_q; 556 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 557 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 558 559 CeedSize qf_length; 560 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 561 562 CeedInt num_input_fields, num_output_fields; 563 CeedOperatorField *input_fields; 564 CeedOperatorField *output_fields; 565 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 566 &num_output_fields, &output_fields); CeedChk(ierr); 567 568 // Determine active input basis 569 CeedQFunctionField *qf_fields; 570 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 571 CeedInt num_eval_mode_in = 0, dim = 1; 572 CeedEvalMode *eval_mode_in = NULL; 573 CeedBasis basis_in = NULL; 574 CeedElemRestriction rstr_in = NULL; 575 for (CeedInt i=0; i<num_input_fields; i++) { 576 CeedVector vec; 577 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 578 if (vec == CEED_VECTOR_ACTIVE) { 579 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 580 CeedChk(ierr); 581 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 582 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 583 CeedChk(ierr); 584 CeedEvalMode eval_mode; 585 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 586 CeedChk(ierr); 587 switch (eval_mode) { 588 case CEED_EVAL_NONE: 589 case CEED_EVAL_INTERP: 590 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 591 eval_mode_in[num_eval_mode_in] = eval_mode; 592 num_eval_mode_in += 1; 593 break; 594 case CEED_EVAL_GRAD: 595 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 596 for (CeedInt d=0; d<dim; d++) { 597 eval_mode_in[num_eval_mode_in+d] = eval_mode; 598 } 599 num_eval_mode_in += dim; 600 break; 601 case CEED_EVAL_WEIGHT: 602 case CEED_EVAL_DIV: 603 case CEED_EVAL_CURL: 604 break; // Caught by QF Assembly 605 } 606 } 607 } 608 609 // Determine active output basis 610 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 611 CeedInt num_eval_mode_out = 0; 612 CeedEvalMode *eval_mode_out = NULL; 613 CeedBasis basis_out = NULL; 614 CeedElemRestriction rstr_out = NULL; 615 for (CeedInt i=0; i<num_output_fields; i++) { 616 CeedVector vec; 617 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 618 if (vec == CEED_VECTOR_ACTIVE) { 619 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 620 CeedChk(ierr); 621 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 622 CeedChk(ierr); 623 CeedEvalMode eval_mode; 624 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 625 CeedChk(ierr); 626 switch (eval_mode) { 627 case CEED_EVAL_NONE: 628 case CEED_EVAL_INTERP: 629 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 630 eval_mode_out[num_eval_mode_out] = eval_mode; 631 num_eval_mode_out += 1; 632 break; 633 case CEED_EVAL_GRAD: 634 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 635 for (CeedInt d=0; d<dim; d++) { 636 eval_mode_out[num_eval_mode_out+d] = eval_mode; 637 } 638 num_eval_mode_out += dim; 639 break; 640 case CEED_EVAL_WEIGHT: 641 case CEED_EVAL_DIV: 642 case CEED_EVAL_CURL: 643 break; // Caught by QF Assembly 644 } 645 } 646 } 647 648 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 649 // LCOV_EXCL_START 650 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 651 "Cannot assemble operator with out inputs/outputs"); 652 // LCOV_EXCL_STOP 653 654 CeedInt num_elem, elem_size, num_qpts, num_comp; 655 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 656 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 657 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 658 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 659 660 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 661 662 // loop over elements and put in data structure 663 const CeedScalar *interp_in, *grad_in; 664 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 665 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 666 667 const CeedScalar *assembled_qf_array; 668 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 669 CeedChk(ierr); 670 671 CeedInt layout_qf[3]; 672 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 673 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 674 675 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 676 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 677 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 678 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 679 num_qpts]; // logically 3-tensor 680 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 681 CeedScalar elem_mat[elem_size * elem_size]; 682 int count = 0; 683 CeedScalar *vals; 684 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 685 for (int e = 0; e < num_elem; ++e) { 686 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 687 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 688 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 689 B_mat_in[ell] = 0.0; 690 } 691 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 692 B_mat_out[ell] = 0.0; 693 } 694 // Store block-diagonal D matrix as collection of small dense blocks 695 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 696 D_mat[ell] = 0.0; 697 } 698 // form element matrix itself (for each block component) 699 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 700 elem_mat[ell] = 0.0; 701 } 702 for (int q = 0; q < num_qpts; ++q) { 703 for (int n = 0; n < elem_size; ++n) { 704 CeedInt d_in = -1; 705 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 706 const int qq = num_eval_mode_in*q; 707 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 708 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 709 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 710 d_in += 1; 711 B_mat_in[(qq+e_in)*elem_size + n] += 712 grad_in[(d_in*num_qpts+q) * elem_size + n]; 713 } else { 714 // LCOV_EXCL_START 715 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 716 // LCOV_EXCL_STOP 717 } 718 } 719 CeedInt d_out = -1; 720 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 721 const int qq = num_eval_mode_out*q; 722 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 723 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 724 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 725 d_out += 1; 726 B_mat_out[(qq+e_out)*elem_size + n] += 727 grad_in[(d_out*num_qpts+q) * elem_size + n]; 728 } else { 729 // LCOV_EXCL_START 730 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 731 // LCOV_EXCL_STOP 732 } 733 } 734 } 735 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 736 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 737 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 738 +comp_out; 739 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 740 e*layout_qf[2]; 741 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 742 } 743 } 744 } 745 // Compute B^T*D 746 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 747 BTD[ell] = 0.0; 748 } 749 for (int j = 0; j<elem_size; ++j) { 750 for (int q = 0; q<num_qpts; ++q) { 751 int qq = num_eval_mode_out*q; 752 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 753 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 754 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 755 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 756 } 757 } 758 } 759 } 760 761 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 762 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 763 764 // put element matrix in coordinate data structure 765 for (int i = 0; i < elem_size; ++i) { 766 for (int j = 0; j < elem_size; ++j) { 767 vals[offset + count] = elem_mat[i*elem_size + j]; 768 count++; 769 } 770 } 771 } 772 } 773 } 774 if (count != local_num_entries) 775 // LCOV_EXCL_START 776 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 777 // LCOV_EXCL_STOP 778 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 779 780 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 781 CeedChk(ierr); 782 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 783 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 784 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 785 786 return CEED_ERROR_SUCCESS; 787 } 788 789 /** 790 @brief Count number of entries for assembled CeedOperator 791 792 @param[in] op CeedOperator to assemble 793 @param[out] num_entries Number of entries in assembled representation 794 795 @return An error code: 0 - success, otherwise - failure 796 797 @ref Utility 798 **/ 799 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 800 CeedInt *num_entries) { 801 int ierr; 802 CeedElemRestriction rstr; 803 CeedInt num_elem, elem_size, num_comp; 804 805 if (op->is_composite) 806 // LCOV_EXCL_START 807 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 808 "Composite operator not supported"); 809 // LCOV_EXCL_STOP 810 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 811 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 812 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 813 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 814 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 815 816 return CEED_ERROR_SUCCESS; 817 } 818 819 /** 820 @brief Common code for creating a multigrid coarse operator and level 821 transfer operators for a CeedOperator 822 823 @param[in] op_fine Fine grid operator 824 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 825 @param[in] rstr_coarse Coarse grid restriction 826 @param[in] basis_coarse Coarse grid active vector basis 827 @param[in] basis_c_to_f Basis for coarse to fine interpolation 828 @param[out] op_coarse Coarse grid operator 829 @param[out] op_prolong Coarse to fine operator 830 @param[out] op_restrict Fine to coarse operator 831 832 @return An error code: 0 - success, otherwise - failure 833 834 @ref Developer 835 **/ 836 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 837 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 838 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 839 CeedOperator *op_restrict) { 840 int ierr; 841 Ceed ceed; 842 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 843 844 // Check for composite operator 845 bool is_composite; 846 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 847 if (is_composite) 848 // LCOV_EXCL_START 849 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 850 "Automatic multigrid setup for composite operators not supported"); 851 // LCOV_EXCL_STOP 852 853 // Coarse Grid 854 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 855 op_coarse); CeedChk(ierr); 856 CeedElemRestriction rstr_fine = NULL; 857 // -- Clone input fields 858 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 859 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 860 rstr_fine = op_fine->input_fields[i]->elem_restr; 861 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 862 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 863 CeedChk(ierr); 864 } else { 865 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 866 op_fine->input_fields[i]->elem_restr, 867 op_fine->input_fields[i]->basis, 868 op_fine->input_fields[i]->vec); CeedChk(ierr); 869 } 870 } 871 // -- Clone output fields 872 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 873 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 874 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 875 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 876 CeedChk(ierr); 877 } else { 878 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 879 op_fine->output_fields[i]->elem_restr, 880 op_fine->output_fields[i]->basis, 881 op_fine->output_fields[i]->vec); CeedChk(ierr); 882 } 883 } 884 // -- Clone QFunctionAssemblyData 885 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 886 &(*op_coarse)->qf_assembled); CeedChk(ierr); 887 888 // Multiplicity vector 889 CeedVector mult_vec, mult_e_vec; 890 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 891 CeedChk(ierr); 892 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 893 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 894 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 895 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 896 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 897 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 898 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 899 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 900 901 // Restriction 902 CeedInt num_comp; 903 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 904 CeedQFunction qf_restrict; 905 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 906 CeedChk(ierr); 907 CeedInt *num_comp_r_data; 908 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 909 num_comp_r_data[0] = num_comp; 910 CeedQFunctionContext ctx_r; 911 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 912 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 913 sizeof(*num_comp_r_data), num_comp_r_data); 914 CeedChk(ierr); 915 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 916 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 917 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 918 CeedChk(ierr); 919 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 920 CeedChk(ierr); 921 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 922 CEED_EVAL_INTERP); CeedChk(ierr); 923 924 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 925 CEED_QFUNCTION_NONE, op_restrict); 926 CeedChk(ierr); 927 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 928 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 929 CeedChk(ierr); 930 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 931 CEED_BASIS_COLLOCATED, mult_vec); 932 CeedChk(ierr); 933 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 934 CEED_VECTOR_ACTIVE); CeedChk(ierr); 935 936 // Prolongation 937 CeedQFunction qf_prolong; 938 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 939 CeedChk(ierr); 940 CeedInt *num_comp_p_data; 941 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 942 num_comp_p_data[0] = num_comp; 943 CeedQFunctionContext ctx_p; 944 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 945 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 946 sizeof(*num_comp_p_data), num_comp_p_data); 947 CeedChk(ierr); 948 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 949 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 950 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 951 CeedChk(ierr); 952 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 953 CeedChk(ierr); 954 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 955 CeedChk(ierr); 956 957 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 958 CEED_QFUNCTION_NONE, op_prolong); 959 CeedChk(ierr); 960 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 961 CEED_VECTOR_ACTIVE); CeedChk(ierr); 962 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 963 CEED_BASIS_COLLOCATED, mult_vec); 964 CeedChk(ierr); 965 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 966 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 967 CeedChk(ierr); 968 969 // Cleanup 970 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 971 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 972 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 973 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 974 return CEED_ERROR_SUCCESS; 975 } 976 977 /** 978 @brief Build 1D mass matrix and Laplacian with perturbation 979 980 @param[in] interp_1d Interpolation matrix in one dimension 981 @param[in] grad_1d Gradient matrix in one dimension 982 @param[in] q_weight_1d Quadrature weights in one dimension 983 @param[in] P_1d Number of basis nodes in one dimension 984 @param[in] Q_1d Number of quadrature points in one dimension 985 @param[in] dim Dimension of basis 986 @param[out] mass Assembled mass matrix in one dimension 987 @param[out] laplace Assembled perturbed Laplacian in one dimension 988 989 @return An error code: 0 - success, otherwise - failure 990 991 @ref Developer 992 **/ 993 CeedPragmaOptimizeOff 994 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 995 const CeedScalar *grad_1d, 996 const CeedScalar *q_weight_1d, CeedInt P_1d, 997 CeedInt Q_1d, CeedInt dim, 998 CeedScalar *mass, CeedScalar *laplace) { 999 1000 for (CeedInt i=0; i<P_1d; i++) 1001 for (CeedInt j=0; j<P_1d; j++) { 1002 CeedScalar sum = 0.0; 1003 for (CeedInt k=0; k<Q_1d; k++) 1004 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1005 mass[i+j*P_1d] = sum; 1006 } 1007 // -- Laplacian 1008 for (CeedInt i=0; i<P_1d; i++) 1009 for (CeedInt j=0; j<P_1d; j++) { 1010 CeedScalar sum = 0.0; 1011 for (CeedInt k=0; k<Q_1d; k++) 1012 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1013 laplace[i+j*P_1d] = sum; 1014 } 1015 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1016 for (CeedInt i=0; i<P_1d; i++) 1017 laplace[i+P_1d*i] += perturbation; 1018 return CEED_ERROR_SUCCESS; 1019 } 1020 CeedPragmaOptimizeOn 1021 1022 /// @} 1023 1024 /// ---------------------------------------------------------------------------- 1025 /// CeedOperator Backend API 1026 /// ---------------------------------------------------------------------------- 1027 /// @addtogroup CeedOperatorBackend 1028 /// @{ 1029 1030 /** 1031 @brief Create object holding CeedQFunction assembly data for CeedOperator 1032 1033 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1034 @param[out] data Address of the variable where the newly created 1035 CeedQFunctionAssemblyData will be stored 1036 1037 @return An error code: 0 - success, otherwise - failure 1038 1039 @ref Backend 1040 **/ 1041 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1042 CeedQFunctionAssemblyData *data) { 1043 int ierr; 1044 1045 ierr = CeedCalloc(1, data); CeedChk(ierr); 1046 (*data)->ref_count = 1; 1047 (*data)->ceed = ceed; 1048 ierr = CeedReference(ceed); CeedChk(ierr); 1049 1050 return CEED_ERROR_SUCCESS; 1051 } 1052 1053 /** 1054 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1055 1056 @param data CeedQFunctionAssemblyData to increment the reference counter 1057 1058 @return An error code: 0 - success, otherwise - failure 1059 1060 @ref Backend 1061 **/ 1062 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1063 data->ref_count++; 1064 return CEED_ERROR_SUCCESS; 1065 } 1066 1067 /** 1068 @brief Set re-use of CeedQFunctionAssemblyData 1069 1070 @param data CeedQFunctionAssemblyData to mark for reuse 1071 @param reuse_data Boolean flag indicating data re-use 1072 1073 @return An error code: 0 - success, otherwise - failure 1074 1075 @ref Backend 1076 **/ 1077 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, 1078 bool reuse_data) { 1079 data->reuse_data = reuse_data; 1080 data->needs_data_update = true; 1081 return CEED_ERROR_SUCCESS; 1082 } 1083 1084 /** 1085 @brief Mark QFunctionAssemblyData as stale 1086 1087 @param data CeedQFunctionAssemblyData to mark as stale 1088 @param needs_data_update Boolean flag indicating if update is needed or completed 1089 1090 @return An error code: 0 - success, otherwise - failure 1091 1092 @ref Backend 1093 **/ 1094 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, 1095 bool needs_data_update) { 1096 data->needs_data_update = needs_data_update; 1097 return CEED_ERROR_SUCCESS; 1098 } 1099 1100 /** 1101 @brief Determine if QFunctionAssemblyData needs update 1102 1103 @param[in] data CeedQFunctionAssemblyData to mark as stale 1104 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1105 1106 @return An error code: 0 - success, otherwise - failure 1107 1108 @ref Backend 1109 **/ 1110 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1111 bool *is_update_needed) { 1112 *is_update_needed = !data->reuse_data || data->needs_data_update; 1113 return CEED_ERROR_SUCCESS; 1114 } 1115 1116 /** 1117 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1118 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1119 Note: If `*data_copy` is non-NULL, then it is assumed that 1120 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1121 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1122 the only reference to this CeedQFunctionAssemblyData. 1123 1124 @param data CeedQFunctionAssemblyData to copy reference to 1125 @param[out] data_copy Variable to store copied reference 1126 1127 @return An error code: 0 - success, otherwise - failure 1128 1129 @ref Backend 1130 **/ 1131 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1132 CeedQFunctionAssemblyData *data_copy) { 1133 int ierr; 1134 1135 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1136 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1137 *data_copy = data; 1138 return CEED_ERROR_SUCCESS; 1139 } 1140 1141 /** 1142 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1143 1144 @param[in] data CeedQFunctionAssemblyData to retreive status 1145 @param[out] is_setup Boolean flag for setup status 1146 1147 @return An error code: 0 - success, otherwise - failure 1148 1149 @ref Backend 1150 **/ 1151 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1152 bool *is_setup) { 1153 *is_setup = data->is_setup; 1154 return CEED_ERROR_SUCCESS; 1155 } 1156 1157 /** 1158 @brief Set internal objects for CeedQFunctionAssemblyData 1159 1160 @param[in] data CeedQFunctionAssemblyData to set objects 1161 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1162 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1163 1164 @return An error code: 0 - success, otherwise - failure 1165 1166 @ref Backend 1167 **/ 1168 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1169 CeedVector vec, CeedElemRestriction rstr) { 1170 int ierr; 1171 1172 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1173 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1174 1175 data->is_setup = true; 1176 return CEED_ERROR_SUCCESS; 1177 } 1178 1179 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1180 CeedVector *vec, CeedElemRestriction *rstr) { 1181 int ierr; 1182 1183 if (!data->is_setup) 1184 // LCOV_EXCL_START 1185 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1186 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1187 // LCOV_EXCL_STOP 1188 1189 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1190 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1191 1192 return CEED_ERROR_SUCCESS; 1193 } 1194 1195 /** 1196 @brief Destroy CeedQFunctionAssemblyData 1197 1198 @param[out] data CeedQFunctionAssemblyData to destroy 1199 1200 @return An error code: 0 - success, otherwise - failure 1201 1202 @ref Backend 1203 **/ 1204 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1205 int ierr; 1206 1207 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1208 1209 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1210 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1211 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1212 1213 ierr = CeedFree(data); CeedChk(ierr); 1214 return CEED_ERROR_SUCCESS; 1215 } 1216 1217 /// @} 1218 1219 /// ---------------------------------------------------------------------------- 1220 /// CeedOperator Public API 1221 /// ---------------------------------------------------------------------------- 1222 /// @addtogroup CeedOperatorUser 1223 /// @{ 1224 1225 /** 1226 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1227 1228 This returns a CeedVector containing a matrix at each quadrature point 1229 providing the action of the CeedQFunction associated with the CeedOperator. 1230 The vector 'assembled' is of shape 1231 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1232 and contains column-major matrices representing the action of the 1233 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1234 outputs are in the order provided by the user when adding CeedOperator fields. 1235 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1236 'v', provided in that order, would result in an assembled QFunction that 1237 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1238 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1239 1240 Note: Calling this function asserts that setup is complete 1241 and sets the CeedOperator as immutable. 1242 1243 @param op CeedOperator to assemble CeedQFunction 1244 @param[out] assembled CeedVector to store assembled CeedQFunction at 1245 quadrature points 1246 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1247 CeedQFunction 1248 @param request Address of CeedRequest for non-blocking completion, else 1249 @ref CEED_REQUEST_IMMEDIATE 1250 1251 @return An error code: 0 - success, otherwise - failure 1252 1253 @ref User 1254 **/ 1255 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1256 CeedElemRestriction *rstr, 1257 CeedRequest *request) { 1258 int ierr; 1259 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1260 1261 // Backend version 1262 if (op->LinearAssembleQFunction) { 1263 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1264 CeedChk(ierr); 1265 } else { 1266 // Fallback to reference Ceed 1267 if (!op->op_fallback) { 1268 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1269 } 1270 // Assemble 1271 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1272 rstr, request); CeedChk(ierr); 1273 } 1274 return CEED_ERROR_SUCCESS; 1275 } 1276 1277 /** 1278 @brief Assemble CeedQFunction and store result internall. Return copied 1279 references of stored data to the caller. Caller is responsible for 1280 ownership and destruction of the copied references. See also 1281 @ref CeedOperatorLinearAssembleQFunction 1282 1283 @param op CeedOperator to assemble CeedQFunction 1284 @param assembled CeedVector to store assembled CeedQFunction at 1285 quadrature points 1286 @param rstr CeedElemRestriction for CeedVector containing assembled 1287 CeedQFunction 1288 @param request Address of CeedRequest for non-blocking completion, else 1289 @ref CEED_REQUEST_IMMEDIATE 1290 1291 @return An error code: 0 - success, otherwise - failure 1292 1293 @ref User 1294 **/ 1295 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1296 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1297 int ierr; 1298 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1299 1300 // Backend version 1301 if (op->LinearAssembleQFunctionUpdate) { 1302 bool qf_assembled_is_setup; 1303 CeedVector assembled_vec = NULL; 1304 CeedElemRestriction assembled_rstr = NULL; 1305 1306 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1307 &qf_assembled_is_setup); CeedChk(ierr); 1308 if (qf_assembled_is_setup) { 1309 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1310 &assembled_rstr); CeedChk(ierr); 1311 1312 bool update_needed; 1313 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1314 &update_needed); CeedChk(ierr); 1315 if (update_needed) { 1316 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1317 request); CeedChk(ierr); 1318 } 1319 } else { 1320 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1321 request); CeedChk(ierr); 1322 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1323 assembled_rstr); CeedChk(ierr); 1324 } 1325 ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); 1326 CeedChk(ierr); 1327 1328 // Copy reference to internally held copy 1329 *assembled = NULL; 1330 *rstr = NULL; 1331 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1332 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1333 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1334 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1335 } else { 1336 // Fallback to reference Ceed 1337 if (!op->op_fallback) { 1338 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1339 } 1340 // Assemble 1341 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1342 assembled, rstr, request); CeedChk(ierr); 1343 } 1344 1345 return CEED_ERROR_SUCCESS; 1346 } 1347 1348 /** 1349 @brief Assemble the diagonal of a square linear CeedOperator 1350 1351 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1352 1353 Note: Currently only non-composite CeedOperators with a single field and 1354 composite CeedOperators with single field sub-operators are supported. 1355 1356 Note: Calling this function asserts that setup is complete 1357 and sets the CeedOperator as immutable. 1358 1359 @param op CeedOperator to assemble CeedQFunction 1360 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1361 @param request Address of CeedRequest for non-blocking completion, else 1362 @ref CEED_REQUEST_IMMEDIATE 1363 1364 @return An error code: 0 - success, otherwise - failure 1365 1366 @ref User 1367 **/ 1368 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1369 CeedRequest *request) { 1370 int ierr; 1371 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1372 1373 // Use backend version, if available 1374 if (op->LinearAssembleDiagonal) { 1375 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1376 return CEED_ERROR_SUCCESS; 1377 } else if (op->LinearAssembleAddDiagonal) { 1378 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1379 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1380 return CEED_ERROR_SUCCESS; 1381 } else { 1382 // Check for valid fallback resource 1383 const char *resource, *fallback_resource; 1384 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1385 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1386 CeedChk(ierr); 1387 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1388 // Fallback to reference Ceed 1389 if (!op->op_fallback) { 1390 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1391 } 1392 // Assemble 1393 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1394 CeedChk(ierr); 1395 return CEED_ERROR_SUCCESS; 1396 } 1397 } 1398 1399 // Default interface implementation 1400 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1401 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1402 CeedChk(ierr); 1403 return CEED_ERROR_SUCCESS; 1404 } 1405 1406 /** 1407 @brief Assemble the diagonal of a square linear CeedOperator 1408 1409 This sums into a CeedVector the diagonal of a linear CeedOperator. 1410 1411 Note: Currently only non-composite CeedOperators with a single field and 1412 composite CeedOperators with single field sub-operators are supported. 1413 1414 Note: Calling this function asserts that setup is complete 1415 and sets the CeedOperator as immutable. 1416 1417 @param op CeedOperator to assemble CeedQFunction 1418 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1419 @param request Address of CeedRequest for non-blocking completion, else 1420 @ref CEED_REQUEST_IMMEDIATE 1421 1422 @return An error code: 0 - success, otherwise - failure 1423 1424 @ref User 1425 **/ 1426 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1427 CeedRequest *request) { 1428 int ierr; 1429 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1430 1431 // Use backend version, if available 1432 if (op->LinearAssembleAddDiagonal) { 1433 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1434 return CEED_ERROR_SUCCESS; 1435 } else { 1436 // Check for valid fallback resource 1437 const char *resource, *fallback_resource; 1438 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1439 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1440 CeedChk(ierr); 1441 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1442 // Fallback to reference Ceed 1443 if (!op->op_fallback) { 1444 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1445 } 1446 // Assemble 1447 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1448 request); CeedChk(ierr); 1449 return CEED_ERROR_SUCCESS; 1450 } 1451 } 1452 1453 // Default interface implementation 1454 bool is_composite; 1455 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1456 if (is_composite) { 1457 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1458 false, assembled); CeedChk(ierr); 1459 return CEED_ERROR_SUCCESS; 1460 } else { 1461 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1462 CeedChk(ierr); 1463 return CEED_ERROR_SUCCESS; 1464 } 1465 } 1466 1467 /** 1468 @brief Assemble the point block diagonal of a square linear CeedOperator 1469 1470 This overwrites a CeedVector with the point block diagonal of a linear 1471 CeedOperator. 1472 1473 Note: Currently only non-composite CeedOperators with a single field and 1474 composite CeedOperators with single field sub-operators are supported. 1475 1476 Note: Calling this function asserts that setup is complete 1477 and sets the CeedOperator as immutable. 1478 1479 @param op CeedOperator to assemble CeedQFunction 1480 @param[out] assembled CeedVector to store assembled CeedOperator point block 1481 diagonal, provided in row-major form with an 1482 @a num_comp * @a num_comp block at each node. The dimensions 1483 of this vector are derived from the active vector 1484 for the CeedOperator. The array has shape 1485 [nodes, component out, component in]. 1486 @param request Address of CeedRequest for non-blocking completion, else 1487 CEED_REQUEST_IMMEDIATE 1488 1489 @return An error code: 0 - success, otherwise - failure 1490 1491 @ref User 1492 **/ 1493 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1494 CeedVector assembled, CeedRequest *request) { 1495 int ierr; 1496 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1497 1498 // Use backend version, if available 1499 if (op->LinearAssemblePointBlockDiagonal) { 1500 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1501 CeedChk(ierr); 1502 return CEED_ERROR_SUCCESS; 1503 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1504 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1505 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1506 request); CeedChk(ierr); 1507 return CEED_ERROR_SUCCESS; 1508 } else { 1509 // Check for valid fallback resource 1510 const char *resource, *fallback_resource; 1511 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1512 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1513 CeedChk(ierr); 1514 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1515 // Fallback to reference Ceed 1516 if (!op->op_fallback) { 1517 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1518 } 1519 // Assemble 1520 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1521 assembled, request); CeedChk(ierr); 1522 return CEED_ERROR_SUCCESS; 1523 } 1524 } 1525 1526 // Default interface implementation 1527 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1528 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1529 CeedChk(ierr); 1530 return CEED_ERROR_SUCCESS; 1531 } 1532 1533 /** 1534 @brief Assemble the point block diagonal of a square linear CeedOperator 1535 1536 This sums into a CeedVector with the point block diagonal of a linear 1537 CeedOperator. 1538 1539 Note: Currently only non-composite CeedOperators with a single field and 1540 composite CeedOperators with single field sub-operators are supported. 1541 1542 Note: Calling this function asserts that setup is complete 1543 and sets the CeedOperator as immutable. 1544 1545 @param op CeedOperator to assemble CeedQFunction 1546 @param[out] assembled CeedVector to store assembled CeedOperator point block 1547 diagonal, provided in row-major form with an 1548 @a num_comp * @a num_comp block at each node. The dimensions 1549 of this vector are derived from the active vector 1550 for the CeedOperator. The array has shape 1551 [nodes, component out, component in]. 1552 @param request Address of CeedRequest for non-blocking completion, else 1553 CEED_REQUEST_IMMEDIATE 1554 1555 @return An error code: 0 - success, otherwise - failure 1556 1557 @ref User 1558 **/ 1559 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1560 CeedVector assembled, CeedRequest *request) { 1561 int ierr; 1562 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1563 1564 // Use backend version, if available 1565 if (op->LinearAssembleAddPointBlockDiagonal) { 1566 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1567 CeedChk(ierr); 1568 return CEED_ERROR_SUCCESS; 1569 } else { 1570 // Check for valid fallback resource 1571 const char *resource, *fallback_resource; 1572 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1573 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1574 CeedChk(ierr); 1575 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1576 // Fallback to reference Ceed 1577 if (!op->op_fallback) { 1578 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1579 } 1580 // Assemble 1581 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1582 assembled, request); CeedChk(ierr); 1583 return CEED_ERROR_SUCCESS; 1584 } 1585 } 1586 1587 // Default interface implemenation 1588 bool is_composite; 1589 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1590 if (is_composite) { 1591 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1592 true, assembled); CeedChk(ierr); 1593 return CEED_ERROR_SUCCESS; 1594 } else { 1595 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1596 CeedChk(ierr); 1597 return CEED_ERROR_SUCCESS; 1598 } 1599 } 1600 1601 /** 1602 @brief Fully assemble the nonzero pattern of a linear operator. 1603 1604 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1605 1606 The assembly routines use coordinate format, with num_entries tuples of the 1607 form (i, j, value) which indicate that value should be added to the matrix 1608 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1609 This function returns the number of entries and their (i, j) locations, 1610 while CeedOperatorLinearAssemble() provides the values in the same 1611 ordering. 1612 1613 This will generally be slow unless your operator is low-order. 1614 1615 Note: Calling this function asserts that setup is complete 1616 and sets the CeedOperator as immutable. 1617 1618 @param[in] op CeedOperator to assemble 1619 @param[out] num_entries Number of entries in coordinate nonzero pattern 1620 @param[out] rows Row number for each entry 1621 @param[out] cols Column number for each entry 1622 1623 @ref User 1624 **/ 1625 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1626 CeedInt **rows, CeedInt **cols) { 1627 int ierr; 1628 CeedInt num_suboperators, single_entries; 1629 CeedOperator *sub_operators; 1630 bool is_composite; 1631 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1632 1633 // Use backend version, if available 1634 if (op->LinearAssembleSymbolic) { 1635 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1636 return CEED_ERROR_SUCCESS; 1637 } else { 1638 // Check for valid fallback resource 1639 const char *resource, *fallback_resource; 1640 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1641 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1642 CeedChk(ierr); 1643 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1644 // Fallback to reference Ceed 1645 if (!op->op_fallback) { 1646 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1647 } 1648 // Assemble 1649 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1650 cols); CeedChk(ierr); 1651 return CEED_ERROR_SUCCESS; 1652 } 1653 } 1654 1655 // Default interface implementation 1656 1657 // count entries and allocate rows, cols arrays 1658 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1659 *num_entries = 0; 1660 if (is_composite) { 1661 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1662 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1663 for (int k = 0; k < num_suboperators; ++k) { 1664 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1665 &single_entries); CeedChk(ierr); 1666 *num_entries += single_entries; 1667 } 1668 } else { 1669 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1670 &single_entries); CeedChk(ierr); 1671 *num_entries += single_entries; 1672 } 1673 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1674 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1675 1676 // assemble nonzero locations 1677 CeedInt offset = 0; 1678 if (is_composite) { 1679 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1680 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1681 for (int k = 0; k < num_suboperators; ++k) { 1682 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1683 *cols); CeedChk(ierr); 1684 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1685 &single_entries); 1686 CeedChk(ierr); 1687 offset += single_entries; 1688 } 1689 } else { 1690 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1691 CeedChk(ierr); 1692 } 1693 1694 return CEED_ERROR_SUCCESS; 1695 } 1696 1697 /** 1698 @brief Fully assemble the nonzero entries of a linear operator. 1699 1700 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1701 1702 The assembly routines use coordinate format, with num_entries tuples of the 1703 form (i, j, value) which indicate that value should be added to the matrix 1704 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1705 This function returns the values of the nonzero entries to be added, their 1706 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1707 1708 This will generally be slow unless your operator is low-order. 1709 1710 Note: Calling this function asserts that setup is complete 1711 and sets the CeedOperator as immutable. 1712 1713 @param[in] op CeedOperator to assemble 1714 @param[out] values Values to assemble into matrix 1715 1716 @ref User 1717 **/ 1718 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1719 int ierr; 1720 CeedInt num_suboperators, single_entries = 0; 1721 CeedOperator *sub_operators; 1722 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1723 1724 // Use backend version, if available 1725 if (op->LinearAssemble) { 1726 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1727 return CEED_ERROR_SUCCESS; 1728 } else { 1729 // Check for valid fallback resource 1730 const char *resource, *fallback_resource; 1731 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1732 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1733 CeedChk(ierr); 1734 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1735 // Fallback to reference Ceed 1736 if (!op->op_fallback) { 1737 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1738 } 1739 // Assemble 1740 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1741 return CEED_ERROR_SUCCESS; 1742 } 1743 } 1744 1745 // Default interface implementation 1746 bool is_composite; 1747 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1748 1749 CeedInt offset = 0; 1750 if (is_composite) { 1751 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1752 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1753 for (int k = 0; k < num_suboperators; ++k) { 1754 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1755 CeedChk(ierr); 1756 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1757 &single_entries); 1758 CeedChk(ierr); 1759 offset += single_entries; 1760 } 1761 } else { 1762 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1763 } 1764 1765 return CEED_ERROR_SUCCESS; 1766 } 1767 1768 /** 1769 @brief Create a multigrid coarse operator and level transfer operators 1770 for a CeedOperator, creating the prolongation basis from the 1771 fine and coarse grid interpolation 1772 1773 Note: Calling this function asserts that setup is complete 1774 and sets the CeedOperator as immutable. 1775 1776 @param[in] op_fine Fine grid operator 1777 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1778 @param[in] rstr_coarse Coarse grid restriction 1779 @param[in] basis_coarse Coarse grid active vector basis 1780 @param[out] op_coarse Coarse grid operator 1781 @param[out] op_prolong Coarse to fine operator 1782 @param[out] op_restrict Fine to coarse operator 1783 1784 @return An error code: 0 - success, otherwise - failure 1785 1786 @ref User 1787 **/ 1788 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1789 CeedVector p_mult_fine, 1790 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1791 CeedOperator *op_coarse, CeedOperator *op_prolong, 1792 CeedOperator *op_restrict) { 1793 int ierr; 1794 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1795 Ceed ceed; 1796 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1797 1798 // Check for compatible quadrature spaces 1799 CeedBasis basis_fine; 1800 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1801 CeedInt Q_f, Q_c; 1802 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1803 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1804 if (Q_f != Q_c) 1805 // LCOV_EXCL_START 1806 return CeedError(ceed, CEED_ERROR_DIMENSION, 1807 "Bases must have compatible quadrature spaces"); 1808 // LCOV_EXCL_STOP 1809 1810 // Coarse to fine basis 1811 CeedInt P_f, P_c, Q = Q_f; 1812 bool is_tensor_f, is_tensor_c; 1813 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1814 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1815 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1816 if (is_tensor_f && is_tensor_c) { 1817 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1818 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1819 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1820 } else if (!is_tensor_f && !is_tensor_c) { 1821 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1822 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1823 } else { 1824 // LCOV_EXCL_START 1825 return CeedError(ceed, CEED_ERROR_MINOR, 1826 "Bases must both be tensor or non-tensor"); 1827 // LCOV_EXCL_STOP 1828 } 1829 1830 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1831 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1832 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1833 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1834 if (is_tensor_f) { 1835 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1836 memcpy(interp_c, basis_coarse->interp_1d, 1837 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1838 } else { 1839 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1840 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1841 } 1842 1843 // -- QR Factorization, interp_f = Q R 1844 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1845 1846 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1847 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1848 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1849 1850 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1851 for (CeedInt j=0; j<P_c; j++) { // Column j 1852 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1853 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1854 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1855 for (CeedInt k=i+1; k<P_f; k++) 1856 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1857 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1858 } 1859 } 1860 ierr = CeedFree(&tau); CeedChk(ierr); 1861 ierr = CeedFree(&interp_c); CeedChk(ierr); 1862 ierr = CeedFree(&interp_f); CeedChk(ierr); 1863 1864 // Complete with interp_c_to_f versions of code 1865 if (is_tensor_f) { 1866 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1867 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1868 CeedChk(ierr); 1869 } else { 1870 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1871 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1872 CeedChk(ierr); 1873 } 1874 1875 // Cleanup 1876 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1877 return CEED_ERROR_SUCCESS; 1878 } 1879 1880 /** 1881 @brief Create a multigrid coarse operator and level transfer operators 1882 for a CeedOperator with a tensor basis for the active basis 1883 1884 Note: Calling this function asserts that setup is complete 1885 and sets the CeedOperator as immutable. 1886 1887 @param[in] op_fine Fine grid operator 1888 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1889 @param[in] rstr_coarse Coarse grid restriction 1890 @param[in] basis_coarse Coarse grid active vector basis 1891 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1892 @param[out] op_coarse Coarse grid operator 1893 @param[out] op_prolong Coarse to fine operator 1894 @param[out] op_restrict Fine to coarse operator 1895 1896 @return An error code: 0 - success, otherwise - failure 1897 1898 @ref User 1899 **/ 1900 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1901 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1902 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1903 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1904 int ierr; 1905 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1906 Ceed ceed; 1907 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1908 1909 // Check for compatible quadrature spaces 1910 CeedBasis basis_fine; 1911 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1912 CeedInt Q_f, Q_c; 1913 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1914 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1915 if (Q_f != Q_c) 1916 // LCOV_EXCL_START 1917 return CeedError(ceed, CEED_ERROR_DIMENSION, 1918 "Bases must have compatible quadrature spaces"); 1919 // LCOV_EXCL_STOP 1920 1921 // Coarse to fine basis 1922 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1923 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1924 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1925 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1926 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1927 CeedChk(ierr); 1928 P_1d_c = dim == 1 ? num_nodes_c : 1929 dim == 2 ? sqrt(num_nodes_c) : 1930 cbrt(num_nodes_c); 1931 CeedScalar *q_ref, *q_weight, *grad; 1932 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1933 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1934 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1935 CeedBasis basis_c_to_f; 1936 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1937 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1938 CeedChk(ierr); 1939 ierr = CeedFree(&q_ref); CeedChk(ierr); 1940 ierr = CeedFree(&q_weight); CeedChk(ierr); 1941 ierr = CeedFree(&grad); CeedChk(ierr); 1942 1943 // Core code 1944 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1945 basis_coarse, basis_c_to_f, op_coarse, 1946 op_prolong, op_restrict); 1947 CeedChk(ierr); 1948 return CEED_ERROR_SUCCESS; 1949 } 1950 1951 /** 1952 @brief Create a multigrid coarse operator and level transfer operators 1953 for a CeedOperator with a non-tensor basis for the active vector 1954 1955 Note: Calling this function asserts that setup is complete 1956 and sets the CeedOperator as immutable. 1957 1958 @param[in] op_fine Fine grid operator 1959 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1960 @param[in] rstr_coarse Coarse grid restriction 1961 @param[in] basis_coarse Coarse grid active vector basis 1962 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1963 @param[out] op_coarse Coarse grid operator 1964 @param[out] op_prolong Coarse to fine operator 1965 @param[out] op_restrict Fine to coarse operator 1966 1967 @return An error code: 0 - success, otherwise - failure 1968 1969 @ref User 1970 **/ 1971 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1972 CeedVector p_mult_fine, 1973 CeedElemRestriction rstr_coarse, 1974 CeedBasis basis_coarse, 1975 const CeedScalar *interp_c_to_f, 1976 CeedOperator *op_coarse, 1977 CeedOperator *op_prolong, 1978 CeedOperator *op_restrict) { 1979 int ierr; 1980 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1981 Ceed ceed; 1982 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1983 1984 // Check for compatible quadrature spaces 1985 CeedBasis basis_fine; 1986 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1987 CeedInt Q_f, Q_c; 1988 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1989 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1990 if (Q_f != Q_c) 1991 // LCOV_EXCL_START 1992 return CeedError(ceed, CEED_ERROR_DIMENSION, 1993 "Bases must have compatible quadrature spaces"); 1994 // LCOV_EXCL_STOP 1995 1996 // Coarse to fine basis 1997 CeedElemTopology topo; 1998 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1999 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2000 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2001 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2002 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 2003 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2004 CeedChk(ierr); 2005 CeedScalar *q_ref, *q_weight, *grad; 2006 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 2007 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 2008 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 2009 CeedBasis basis_c_to_f; 2010 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 2011 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2012 CeedChk(ierr); 2013 ierr = CeedFree(&q_ref); CeedChk(ierr); 2014 ierr = CeedFree(&q_weight); CeedChk(ierr); 2015 ierr = CeedFree(&grad); CeedChk(ierr); 2016 2017 // Core code 2018 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2019 basis_coarse, basis_c_to_f, op_coarse, 2020 op_prolong, op_restrict); 2021 CeedChk(ierr); 2022 return CEED_ERROR_SUCCESS; 2023 } 2024 2025 /** 2026 @brief Build a FDM based approximate inverse for each element for a 2027 CeedOperator 2028 2029 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2030 Method based approximate inverse. This function obtains the simultaneous 2031 diagonalization for the 1D mass and Laplacian operators, 2032 M = V^T V, K = V^T S V. 2033 The assembled QFunction is used to modify the eigenvalues from simultaneous 2034 diagonalization and obtain an approximate inverse of the form 2035 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2036 associated CeedQFunction must therefore also be linear. 2037 2038 Note: Calling this function asserts that setup is complete 2039 and sets the CeedOperator as immutable. 2040 2041 @param op CeedOperator to create element inverses 2042 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2043 for each element 2044 @param request Address of CeedRequest for non-blocking completion, else 2045 @ref CEED_REQUEST_IMMEDIATE 2046 2047 @return An error code: 0 - success, otherwise - failure 2048 2049 @ref User 2050 **/ 2051 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2052 CeedRequest *request) { 2053 int ierr; 2054 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2055 2056 // Use backend version, if available 2057 if (op->CreateFDMElementInverse) { 2058 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2059 return CEED_ERROR_SUCCESS; 2060 } else { 2061 // Check for valid fallback resource 2062 const char *resource, *fallback_resource; 2063 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2064 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2065 CeedChk(ierr); 2066 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2067 // Fallback to reference Ceed 2068 if (!op->op_fallback) { 2069 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2070 } 2071 // Assemble 2072 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2073 CeedChk(ierr); 2074 return CEED_ERROR_SUCCESS; 2075 } 2076 } 2077 2078 // Interface implementation 2079 Ceed ceed, ceed_parent; 2080 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2081 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2082 ceed_parent = ceed_parent ? ceed_parent : ceed; 2083 CeedQFunction qf; 2084 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2085 2086 // Determine active input basis 2087 bool interp = false, grad = false; 2088 CeedBasis basis = NULL; 2089 CeedElemRestriction rstr = NULL; 2090 CeedOperatorField *op_fields; 2091 CeedQFunctionField *qf_fields; 2092 CeedInt num_input_fields; 2093 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2094 CeedChk(ierr); 2095 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2096 for (CeedInt i=0; i<num_input_fields; i++) { 2097 CeedVector vec; 2098 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2099 if (vec == CEED_VECTOR_ACTIVE) { 2100 CeedEvalMode eval_mode; 2101 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2102 interp = interp || eval_mode == CEED_EVAL_INTERP; 2103 grad = grad || eval_mode == CEED_EVAL_GRAD; 2104 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2105 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2106 } 2107 } 2108 if (!basis) 2109 // LCOV_EXCL_START 2110 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2111 // LCOV_EXCL_STOP 2112 CeedSize l_size = 1; 2113 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2114 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2115 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2116 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2117 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2118 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2119 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2120 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2121 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2122 2123 // Build and diagonalize 1D Mass and Laplacian 2124 bool tensor_basis; 2125 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2126 if (!tensor_basis) 2127 // LCOV_EXCL_START 2128 return CeedError(ceed, CEED_ERROR_BACKEND, 2129 "FDMElementInverse only supported for tensor " 2130 "bases"); 2131 // LCOV_EXCL_STOP 2132 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2133 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2134 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2135 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2136 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2137 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2138 // -- Build matrices 2139 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2140 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2141 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2142 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2143 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2144 mass, laplace); CeedChk(ierr); 2145 2146 // -- Diagonalize 2147 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2148 CeedChk(ierr); 2149 ierr = CeedFree(&mass); CeedChk(ierr); 2150 ierr = CeedFree(&laplace); CeedChk(ierr); 2151 for (CeedInt i=0; i<P_1d; i++) 2152 for (CeedInt j=0; j<P_1d; j++) 2153 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2154 ierr = CeedFree(&x); CeedChk(ierr); 2155 2156 // Assemble QFunction 2157 CeedVector assembled; 2158 CeedElemRestriction rstr_qf; 2159 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2160 &rstr_qf, request); CeedChk(ierr); 2161 CeedInt layout[3]; 2162 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2163 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2164 CeedScalar max_norm = 0; 2165 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2166 2167 // Calculate element averages 2168 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2169 CeedScalar *elem_avg; 2170 const CeedScalar *assembled_array, *q_weight_array; 2171 CeedVector q_weight; 2172 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2173 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2174 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2175 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2176 CeedChk(ierr); 2177 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2178 CeedChk(ierr); 2179 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2180 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2181 for (CeedInt e=0; e<num_elem; e++) { 2182 CeedInt count = 0; 2183 for (CeedInt q=0; q<num_qpts; q++) 2184 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2185 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2186 qf_value_bound) { 2187 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2188 q_weight_array[q]; 2189 count++; 2190 } 2191 if (count) { 2192 elem_avg[e] /= count; 2193 } else { 2194 elem_avg[e] = 1.0; 2195 } 2196 } 2197 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2198 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2199 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2200 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2201 2202 // Build FDM diagonal 2203 CeedVector q_data; 2204 CeedScalar *q_data_array, *fdm_diagonal; 2205 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2206 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2207 for (CeedInt c=0; c<num_comp; c++) 2208 for (CeedInt n=0; n<elem_size; n++) { 2209 if (interp) 2210 fdm_diagonal[c*elem_size + n] = 1.0; 2211 if (grad) 2212 for (CeedInt d=0; d<dim; d++) { 2213 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2214 fdm_diagonal[c*elem_size + n] += lambda[i]; 2215 } 2216 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2217 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2218 } 2219 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2220 CeedChk(ierr); 2221 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2222 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2223 CeedChk(ierr); 2224 for (CeedInt e=0; e<num_elem; e++) 2225 for (CeedInt c=0; c<num_comp; c++) 2226 for (CeedInt n=0; n<elem_size; n++) 2227 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2228 fdm_diagonal[c*elem_size + n]); 2229 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2230 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2231 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2232 2233 // Setup FDM operator 2234 // -- Basis 2235 CeedBasis fdm_basis; 2236 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2237 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2238 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2239 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2240 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2241 fdm_interp, grad_dummy, q_ref_dummy, 2242 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2243 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2244 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2245 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2246 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2247 ierr = CeedFree(&lambda); CeedChk(ierr); 2248 2249 // -- Restriction 2250 CeedElemRestriction rstr_qd_i; 2251 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2252 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2253 num_comp, num_elem*num_comp*elem_size, 2254 strides, &rstr_qd_i); CeedChk(ierr); 2255 // -- QFunction 2256 CeedQFunction qf_fdm; 2257 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2258 CeedChk(ierr); 2259 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2260 CeedChk(ierr); 2261 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2262 CeedChk(ierr); 2263 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2264 CeedChk(ierr); 2265 // -- QFunction context 2266 CeedInt *num_comp_data; 2267 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2268 num_comp_data[0] = num_comp; 2269 CeedQFunctionContext ctx_fdm; 2270 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2271 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2272 sizeof(*num_comp_data), num_comp_data); 2273 CeedChk(ierr); 2274 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2275 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2276 // -- Operator 2277 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2278 CeedChk(ierr); 2279 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2280 CeedChk(ierr); 2281 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2282 q_data); CeedChk(ierr); 2283 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2284 CeedChk(ierr); 2285 2286 // Cleanup 2287 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2288 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2289 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2290 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2291 2292 return CEED_ERROR_SUCCESS; 2293 } 2294 2295 /// @} 2296