1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed-impl.h> 11 #include <math.h> 12 #include <stdbool.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 /// @file 17 /// Implementation of CeedOperator preconditioning interfaces 18 19 /// ---------------------------------------------------------------------------- 20 /// CeedOperator Library Internal Preconditioning Functions 21 /// ---------------------------------------------------------------------------- 22 /// @addtogroup CeedOperatorDeveloper 23 /// @{ 24 25 /** 26 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 27 CeedOperator functionality 28 29 @param op CeedOperator to create fallback for 30 31 @return An error code: 0 - success, otherwise - failure 32 33 @ref Developer 34 **/ 35 int CeedOperatorCreateFallback(CeedOperator op) { 36 int ierr; 37 38 // Fallback Ceed 39 const char *resource, *fallback_resource; 40 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 41 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 42 CeedChk(ierr); 43 if (!strcmp(resource, fallback_resource)) 44 // LCOV_EXCL_START 45 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 46 "Backend %s cannot create an operator" 47 "fallback to resource %s", resource, fallback_resource); 48 // LCOV_EXCL_STOP 49 50 // Fallback Ceed 51 Ceed ceed_ref; 52 if (!op->ceed->op_fallback_ceed) { 53 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 54 ceed_ref->op_fallback_parent = op->ceed; 55 ceed_ref->Error = op->ceed->Error; 56 op->ceed->op_fallback_ceed = ceed_ref; 57 } 58 ceed_ref = op->ceed->op_fallback_ceed; 59 60 // Clone Op 61 CeedOperator op_ref; 62 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 63 memcpy(op_ref, op, sizeof(*op_ref)); 64 op_ref->data = NULL; 65 op_ref->is_interface_setup = false; 66 op_ref->is_backend_setup = false; 67 op_ref->ceed = ceed_ref; 68 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 69 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 70 &op_ref->qf_assembled); CeedChk(ierr); 71 op->op_fallback = op_ref; 72 73 // Clone QF 74 CeedQFunction qf_ref; 75 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 76 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 77 qf_ref->data = NULL; 78 qf_ref->ceed = ceed_ref; 79 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 80 op_ref->qf = qf_ref; 81 op->qf_fallback = qf_ref; 82 return CEED_ERROR_SUCCESS; 83 } 84 85 /** 86 @brief Select correct basis matrix pointer based on CeedEvalMode 87 88 @param[in] eval_mode Current basis evaluation mode 89 @param[in] identity Pointer to identity matrix 90 @param[in] interp Pointer to interpolation matrix 91 @param[in] grad Pointer to gradient matrix 92 @param[out] basis_ptr Basis pointer to set 93 94 @ref Developer 95 **/ 96 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 97 const CeedScalar *identity, const CeedScalar *interp, 98 const CeedScalar *grad, const CeedScalar **basis_ptr) { 99 switch (eval_mode) { 100 case CEED_EVAL_NONE: 101 *basis_ptr = identity; 102 break; 103 case CEED_EVAL_INTERP: 104 *basis_ptr = interp; 105 break; 106 case CEED_EVAL_GRAD: 107 *basis_ptr = grad; 108 break; 109 case CEED_EVAL_WEIGHT: 110 case CEED_EVAL_DIV: 111 case CEED_EVAL_CURL: 112 break; // Caught by QF Assembly 113 } 114 } 115 116 /** 117 @brief Create point block restriction for active operator field 118 119 @param[in] rstr Original CeedElemRestriction for active field 120 @param[out] pointblock_rstr Address of the variable where the newly created 121 CeedElemRestriction will be stored 122 123 @return An error code: 0 - success, otherwise - failure 124 125 @ref Developer 126 **/ 127 static int CeedOperatorCreateActivePointBlockRestriction( 128 CeedElemRestriction rstr, 129 CeedElemRestriction *pointblock_rstr) { 130 int ierr; 131 Ceed ceed; 132 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 133 const CeedInt *offsets; 134 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 135 CeedChk(ierr); 136 137 // Expand offsets 138 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 139 *pointblock_offsets; 140 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 141 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 142 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 143 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 144 CeedInt shift = num_comp; 145 if (comp_stride != 1) 146 shift *= num_comp; 147 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 148 CeedChk(ierr); 149 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 150 pointblock_offsets[i] = offsets[i]*shift; 151 if (pointblock_offsets[i] > max) 152 max = pointblock_offsets[i]; 153 } 154 155 // Create new restriction 156 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 157 1, max + num_comp*num_comp, CEED_MEM_HOST, 158 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 159 CeedChk(ierr); 160 161 // Cleanup 162 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 163 164 return CEED_ERROR_SUCCESS; 165 } 166 167 /** 168 @brief Core logic for assembling operator diagonal or point block diagonal 169 170 @param[in] op CeedOperator to assemble point block diagonal 171 @param[in] request Address of CeedRequest for non-blocking completion, else 172 CEED_REQUEST_IMMEDIATE 173 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 174 @param[out] assembled CeedVector to store assembled diagonal 175 176 @return An error code: 0 - success, otherwise - failure 177 178 @ref Developer 179 **/ 180 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 181 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 182 int ierr; 183 Ceed ceed; 184 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 185 186 // Assemble QFunction 187 CeedQFunction qf; 188 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 189 CeedInt num_input_fields, num_output_fields; 190 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 191 CeedChk(ierr); 192 CeedVector assembled_qf; 193 CeedElemRestriction rstr; 194 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 195 &rstr, request); CeedChk(ierr); 196 CeedInt layout[3]; 197 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 198 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 199 CeedScalar max_norm = 0; 200 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 201 202 // Determine active input basis 203 CeedOperatorField *op_fields; 204 CeedQFunctionField *qf_fields; 205 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 206 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 207 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 208 CeedEvalMode *eval_mode_in = NULL; 209 CeedBasis basis_in = NULL; 210 CeedElemRestriction rstr_in = NULL; 211 for (CeedInt i=0; i<num_input_fields; i++) { 212 CeedVector vec; 213 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 214 if (vec == CEED_VECTOR_ACTIVE) { 215 CeedElemRestriction rstr; 216 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 217 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 218 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 219 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 220 if (rstr_in && rstr_in != rstr) 221 // LCOV_EXCL_START 222 return CeedError(ceed, CEED_ERROR_BACKEND, 223 "Multi-field non-composite operator diagonal assembly not supported"); 224 // LCOV_EXCL_STOP 225 rstr_in = rstr; 226 CeedEvalMode eval_mode; 227 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 228 switch (eval_mode) { 229 case CEED_EVAL_NONE: 230 case CEED_EVAL_INTERP: 231 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 232 eval_mode_in[num_eval_mode_in] = eval_mode; 233 num_eval_mode_in += 1; 234 break; 235 case CEED_EVAL_GRAD: 236 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 237 for (CeedInt d=0; d<dim; d++) 238 eval_mode_in[num_eval_mode_in+d] = eval_mode; 239 num_eval_mode_in += dim; 240 break; 241 case CEED_EVAL_WEIGHT: 242 case CEED_EVAL_DIV: 243 case CEED_EVAL_CURL: 244 break; // Caught by QF Assembly 245 } 246 } 247 } 248 249 // Determine active output basis 250 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 251 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 252 CeedInt num_eval_mode_out = 0; 253 CeedEvalMode *eval_mode_out = NULL; 254 CeedBasis basis_out = NULL; 255 CeedElemRestriction rstr_out = NULL; 256 for (CeedInt i=0; i<num_output_fields; i++) { 257 CeedVector vec; 258 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 259 if (vec == CEED_VECTOR_ACTIVE) { 260 CeedElemRestriction rstr; 261 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 262 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 263 if (rstr_out && rstr_out != rstr) 264 // LCOV_EXCL_START 265 return CeedError(ceed, CEED_ERROR_BACKEND, 266 "Multi-field non-composite operator diagonal assembly not supported"); 267 // LCOV_EXCL_STOP 268 rstr_out = rstr; 269 CeedEvalMode eval_mode; 270 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 271 switch (eval_mode) { 272 case CEED_EVAL_NONE: 273 case CEED_EVAL_INTERP: 274 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 275 eval_mode_out[num_eval_mode_out] = eval_mode; 276 num_eval_mode_out += 1; 277 break; 278 case CEED_EVAL_GRAD: 279 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 280 for (CeedInt d=0; d<dim; d++) 281 eval_mode_out[num_eval_mode_out+d] = eval_mode; 282 num_eval_mode_out += dim; 283 break; 284 case CEED_EVAL_WEIGHT: 285 case CEED_EVAL_DIV: 286 case CEED_EVAL_CURL: 287 break; // Caught by QF Assembly 288 } 289 } 290 } 291 292 // Assemble point block diagonal restriction, if needed 293 CeedElemRestriction diag_rstr = rstr_out; 294 if (is_pointblock) { 295 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 296 CeedChk(ierr); 297 } 298 299 // Create diagonal vector 300 CeedVector elem_diag; 301 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 302 CeedChk(ierr); 303 304 // Assemble element operator diagonals 305 CeedScalar *elem_diag_array; 306 const CeedScalar *assembled_qf_array; 307 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 308 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 309 CeedChk(ierr); 310 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 311 CeedChk(ierr); 312 CeedInt num_elem, num_nodes, num_qpts; 313 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 314 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 315 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 316 // Basis matrices 317 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 318 CeedScalar *identity = NULL; 319 bool evalNone = false; 320 for (CeedInt i=0; i<num_eval_mode_in; i++) 321 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 322 for (CeedInt i=0; i<num_eval_mode_out; i++) 323 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 324 if (evalNone) { 325 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 326 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 327 identity[i*num_nodes+i] = 1.0; 328 } 329 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 330 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 331 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 332 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 333 // Compute the diagonal of B^T D B 334 // Each element 335 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 336 for (CeedInt e=0; e<num_elem; e++) { 337 CeedInt d_out = -1; 338 // Each basis eval mode pair 339 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 340 const CeedScalar *bt = NULL; 341 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 342 d_out += 1; 343 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 344 &grad_out[d_out*num_qpts*num_nodes], &bt); 345 CeedInt d_in = -1; 346 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 347 const CeedScalar *b = NULL; 348 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 349 d_in += 1; 350 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 351 &grad_in[d_in*num_qpts*num_nodes], &b); 352 // Each component 353 for (CeedInt c_out=0; c_out<num_comp; c_out++) 354 // Each qpoint/node pair 355 for (CeedInt q=0; q<num_qpts; q++) 356 if (is_pointblock) { 357 // Point Block Diagonal 358 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 359 const CeedScalar qf_value = 360 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 361 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 362 if (fabs(qf_value) > qf_value_bound) 363 for (CeedInt n=0; n<num_nodes; n++) 364 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 365 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 366 } 367 } else { 368 // Diagonal Only 369 const CeedScalar qf_value = 370 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 371 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 372 if (fabs(qf_value) > qf_value_bound) 373 for (CeedInt n=0; n<num_nodes; n++) 374 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 375 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 376 } 377 } 378 } 379 } 380 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 381 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 382 CeedChk(ierr); 383 384 // Assemble local operator diagonal 385 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 386 assembled, request); CeedChk(ierr); 387 388 // Cleanup 389 if (is_pointblock) { 390 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 391 } 392 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 393 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 394 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 395 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 396 ierr = CeedFree(&identity); CeedChk(ierr); 397 398 return CEED_ERROR_SUCCESS; 399 } 400 401 /** 402 @brief Core logic for assembling composite operator diagonal 403 404 @param[in] op CeedOperator to assemble point block diagonal 405 @param[in] request Address of CeedRequest for non-blocking completion, else 406 CEED_REQUEST_IMMEDIATE 407 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 408 @param[out] assembled CeedVector to store assembled diagonal 409 410 @return An error code: 0 - success, otherwise - failure 411 412 @ref Developer 413 **/ 414 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 415 CeedOperator op, CeedRequest *request, const bool is_pointblock, 416 CeedVector assembled) { 417 int ierr; 418 CeedInt num_sub; 419 CeedOperator *suboperators; 420 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 421 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 422 for (CeedInt i = 0; i < num_sub; i++) { 423 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 424 is_pointblock, assembled); CeedChk(ierr); 425 } 426 return CEED_ERROR_SUCCESS; 427 } 428 429 /** 430 @brief Build nonzero pattern for non-composite operator 431 432 Users should generally use CeedOperatorLinearAssembleSymbolic() 433 434 @param[in] op CeedOperator to assemble nonzero pattern 435 @param[in] offset Offset for number of entries 436 @param[out] rows Row number for each entry 437 @param[out] cols Column number for each entry 438 439 @return An error code: 0 - success, otherwise - failure 440 441 @ref Developer 442 **/ 443 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 444 CeedInt *rows, CeedInt *cols) { 445 int ierr; 446 Ceed ceed = op->ceed; 447 if (op->is_composite) 448 // LCOV_EXCL_START 449 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 450 "Composite operator not supported"); 451 // LCOV_EXCL_STOP 452 453 CeedSize num_nodes; 454 ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr); 455 CeedElemRestriction rstr_in; 456 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 457 CeedInt num_elem, elem_size, num_comp; 458 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 459 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 460 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 461 CeedInt layout_er[3]; 462 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 463 464 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 465 466 // Determine elem_dof relation 467 CeedVector index_vec; 468 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 469 CeedScalar *array; 470 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 471 for (CeedInt i = 0; i < num_nodes; ++i) { 472 array[i] = i; 473 } 474 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 475 CeedVector elem_dof; 476 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 477 CeedChk(ierr); 478 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 479 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 480 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 481 const CeedScalar *elem_dof_a; 482 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 483 CeedChk(ierr); 484 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 485 486 // Determine i, j locations for element matrices 487 CeedInt count = 0; 488 for (int e = 0; e < num_elem; ++e) { 489 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 490 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 491 for (int i = 0; i < elem_size; ++i) { 492 for (int j = 0; j < elem_size; ++j) { 493 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 494 (comp_out)*layout_er[1] + e*layout_er[2]; 495 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 496 (comp_in)*layout_er[1] + e*layout_er[2]; 497 498 const CeedInt row = elem_dof_a[elem_dof_index_row]; 499 const CeedInt col = elem_dof_a[elem_dof_index_col]; 500 501 rows[offset + count] = row; 502 cols[offset + count] = col; 503 count++; 504 } 505 } 506 } 507 } 508 } 509 if (count != local_num_entries) 510 // LCOV_EXCL_START 511 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 512 // LCOV_EXCL_STOP 513 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 514 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 515 516 return CEED_ERROR_SUCCESS; 517 } 518 519 /** 520 @brief Assemble nonzero entries for non-composite operator 521 522 Users should generally use CeedOperatorLinearAssemble() 523 524 @param[in] op CeedOperator to assemble 525 @param[out] offset Offest for number of entries 526 @param[out] values Values to assemble into matrix 527 528 @return An error code: 0 - success, otherwise - failure 529 530 @ref Developer 531 **/ 532 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 533 CeedVector values) { 534 int ierr; 535 Ceed ceed = op->ceed; 536 if (op->is_composite) 537 // LCOV_EXCL_START 538 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 539 "Composite operator not supported"); 540 // LCOV_EXCL_STOP 541 542 // Assemble QFunction 543 CeedQFunction qf; 544 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 545 CeedVector assembled_qf; 546 CeedElemRestriction rstr_q; 547 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 548 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 549 550 CeedSize qf_length; 551 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 552 553 CeedInt num_input_fields, num_output_fields; 554 CeedOperatorField *input_fields; 555 CeedOperatorField *output_fields; 556 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 557 &num_output_fields, &output_fields); CeedChk(ierr); 558 559 // Determine active input basis 560 CeedQFunctionField *qf_fields; 561 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 562 CeedInt num_eval_mode_in = 0, dim = 1; 563 CeedEvalMode *eval_mode_in = NULL; 564 CeedBasis basis_in = NULL; 565 CeedElemRestriction rstr_in = NULL; 566 for (CeedInt i=0; i<num_input_fields; i++) { 567 CeedVector vec; 568 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 569 if (vec == CEED_VECTOR_ACTIVE) { 570 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 571 CeedChk(ierr); 572 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 573 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 574 CeedChk(ierr); 575 CeedEvalMode eval_mode; 576 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 577 CeedChk(ierr); 578 switch (eval_mode) { 579 case CEED_EVAL_NONE: 580 case CEED_EVAL_INTERP: 581 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 582 eval_mode_in[num_eval_mode_in] = eval_mode; 583 num_eval_mode_in += 1; 584 break; 585 case CEED_EVAL_GRAD: 586 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 587 for (CeedInt d=0; d<dim; d++) { 588 eval_mode_in[num_eval_mode_in+d] = eval_mode; 589 } 590 num_eval_mode_in += dim; 591 break; 592 case CEED_EVAL_WEIGHT: 593 case CEED_EVAL_DIV: 594 case CEED_EVAL_CURL: 595 break; // Caught by QF Assembly 596 } 597 } 598 } 599 600 // Determine active output basis 601 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 602 CeedInt num_eval_mode_out = 0; 603 CeedEvalMode *eval_mode_out = NULL; 604 CeedBasis basis_out = NULL; 605 CeedElemRestriction rstr_out = NULL; 606 for (CeedInt i=0; i<num_output_fields; i++) { 607 CeedVector vec; 608 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 609 if (vec == CEED_VECTOR_ACTIVE) { 610 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 611 CeedChk(ierr); 612 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 613 CeedChk(ierr); 614 CeedEvalMode eval_mode; 615 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 616 CeedChk(ierr); 617 switch (eval_mode) { 618 case CEED_EVAL_NONE: 619 case CEED_EVAL_INTERP: 620 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 621 eval_mode_out[num_eval_mode_out] = eval_mode; 622 num_eval_mode_out += 1; 623 break; 624 case CEED_EVAL_GRAD: 625 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 626 for (CeedInt d=0; d<dim; d++) { 627 eval_mode_out[num_eval_mode_out+d] = eval_mode; 628 } 629 num_eval_mode_out += dim; 630 break; 631 case CEED_EVAL_WEIGHT: 632 case CEED_EVAL_DIV: 633 case CEED_EVAL_CURL: 634 break; // Caught by QF Assembly 635 } 636 } 637 } 638 639 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 640 // LCOV_EXCL_START 641 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 642 "Cannot assemble operator with out inputs/outputs"); 643 // LCOV_EXCL_STOP 644 645 CeedInt num_elem, elem_size, num_qpts, num_comp; 646 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 647 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 648 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 649 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 650 651 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 652 653 // loop over elements and put in data structure 654 const CeedScalar *interp_in, *grad_in; 655 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 656 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 657 658 const CeedScalar *assembled_qf_array; 659 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 660 CeedChk(ierr); 661 662 CeedInt layout_qf[3]; 663 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 664 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 665 666 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 667 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 668 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 669 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 670 num_qpts]; // logically 3-tensor 671 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 672 CeedScalar elem_mat[elem_size * elem_size]; 673 int count = 0; 674 CeedScalar *vals; 675 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 676 for (int e = 0; e < num_elem; ++e) { 677 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 678 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 679 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 680 B_mat_in[ell] = 0.0; 681 } 682 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 683 B_mat_out[ell] = 0.0; 684 } 685 // Store block-diagonal D matrix as collection of small dense blocks 686 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 687 D_mat[ell] = 0.0; 688 } 689 // form element matrix itself (for each block component) 690 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 691 elem_mat[ell] = 0.0; 692 } 693 for (int q = 0; q < num_qpts; ++q) { 694 for (int n = 0; n < elem_size; ++n) { 695 CeedInt d_in = -1; 696 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 697 const int qq = num_eval_mode_in*q; 698 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 699 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 700 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 701 d_in += 1; 702 B_mat_in[(qq+e_in)*elem_size + n] += 703 grad_in[(d_in*num_qpts+q) * elem_size + n]; 704 } else { 705 // LCOV_EXCL_START 706 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 707 // LCOV_EXCL_STOP 708 } 709 } 710 CeedInt d_out = -1; 711 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 712 const int qq = num_eval_mode_out*q; 713 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 714 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 715 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 716 d_out += 1; 717 B_mat_out[(qq+e_out)*elem_size + n] += 718 grad_in[(d_out*num_qpts+q) * elem_size + n]; 719 } else { 720 // LCOV_EXCL_START 721 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 722 // LCOV_EXCL_STOP 723 } 724 } 725 } 726 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 727 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 728 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 729 +comp_out; 730 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 731 e*layout_qf[2]; 732 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 733 } 734 } 735 } 736 // Compute B^T*D 737 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 738 BTD[ell] = 0.0; 739 } 740 for (int j = 0; j<elem_size; ++j) { 741 for (int q = 0; q<num_qpts; ++q) { 742 int qq = num_eval_mode_out*q; 743 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 744 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 745 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 746 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 747 } 748 } 749 } 750 } 751 752 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 753 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 754 755 // put element matrix in coordinate data structure 756 for (int i = 0; i < elem_size; ++i) { 757 for (int j = 0; j < elem_size; ++j) { 758 vals[offset + count] = elem_mat[i*elem_size + j]; 759 count++; 760 } 761 } 762 } 763 } 764 } 765 if (count != local_num_entries) 766 // LCOV_EXCL_START 767 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 768 // LCOV_EXCL_STOP 769 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 770 771 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 772 CeedChk(ierr); 773 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 774 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 775 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 776 777 return CEED_ERROR_SUCCESS; 778 } 779 780 /** 781 @brief Count number of entries for assembled CeedOperator 782 783 @param[in] op CeedOperator to assemble 784 @param[out] num_entries Number of entries in assembled representation 785 786 @return An error code: 0 - success, otherwise - failure 787 788 @ref Utility 789 **/ 790 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 791 CeedInt *num_entries) { 792 int ierr; 793 CeedElemRestriction rstr; 794 CeedInt num_elem, elem_size, num_comp; 795 796 if (op->is_composite) 797 // LCOV_EXCL_START 798 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 799 "Composite operator not supported"); 800 // LCOV_EXCL_STOP 801 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 802 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 803 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 804 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 805 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 806 807 return CEED_ERROR_SUCCESS; 808 } 809 810 /** 811 @brief Common code for creating a multigrid coarse operator and level 812 transfer operators for a CeedOperator 813 814 @param[in] op_fine Fine grid operator 815 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 816 @param[in] rstr_coarse Coarse grid restriction 817 @param[in] basis_coarse Coarse grid active vector basis 818 @param[in] basis_c_to_f Basis for coarse to fine interpolation 819 @param[out] op_coarse Coarse grid operator 820 @param[out] op_prolong Coarse to fine operator 821 @param[out] op_restrict Fine to coarse operator 822 823 @return An error code: 0 - success, otherwise - failure 824 825 @ref Developer 826 **/ 827 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 828 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 829 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 830 CeedOperator *op_restrict) { 831 int ierr; 832 Ceed ceed; 833 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 834 835 // Check for composite operator 836 bool is_composite; 837 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 838 if (is_composite) 839 // LCOV_EXCL_START 840 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 841 "Automatic multigrid setup for composite operators not supported"); 842 // LCOV_EXCL_STOP 843 844 // Coarse Grid 845 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 846 op_coarse); CeedChk(ierr); 847 CeedElemRestriction rstr_fine = NULL; 848 // -- Clone input fields 849 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 850 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 851 rstr_fine = op_fine->input_fields[i]->elem_restr; 852 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 853 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 854 CeedChk(ierr); 855 } else { 856 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 857 op_fine->input_fields[i]->elem_restr, 858 op_fine->input_fields[i]->basis, 859 op_fine->input_fields[i]->vec); CeedChk(ierr); 860 } 861 } 862 // -- Clone output fields 863 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 864 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 865 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 866 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 867 CeedChk(ierr); 868 } else { 869 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 870 op_fine->output_fields[i]->elem_restr, 871 op_fine->output_fields[i]->basis, 872 op_fine->output_fields[i]->vec); CeedChk(ierr); 873 } 874 } 875 // -- Clone QFunctionAssemblyData 876 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 877 &(*op_coarse)->qf_assembled); CeedChk(ierr); 878 879 // Multiplicity vector 880 CeedVector mult_vec, mult_e_vec; 881 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 882 CeedChk(ierr); 883 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 884 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 885 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 886 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 887 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 888 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 889 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 890 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 891 892 // Restriction 893 CeedInt num_comp; 894 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 895 CeedQFunction qf_restrict; 896 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 897 CeedChk(ierr); 898 CeedInt *num_comp_r_data; 899 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 900 num_comp_r_data[0] = num_comp; 901 CeedQFunctionContext ctx_r; 902 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 903 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 904 sizeof(*num_comp_r_data), num_comp_r_data); 905 CeedChk(ierr); 906 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 907 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 908 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 909 CeedChk(ierr); 910 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 911 CeedChk(ierr); 912 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 913 CEED_EVAL_INTERP); CeedChk(ierr); 914 915 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 916 CEED_QFUNCTION_NONE, op_restrict); 917 CeedChk(ierr); 918 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 919 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 920 CeedChk(ierr); 921 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 922 CEED_BASIS_COLLOCATED, mult_vec); 923 CeedChk(ierr); 924 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 925 CEED_VECTOR_ACTIVE); CeedChk(ierr); 926 927 // Prolongation 928 CeedQFunction qf_prolong; 929 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 930 CeedChk(ierr); 931 CeedInt *num_comp_p_data; 932 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 933 num_comp_p_data[0] = num_comp; 934 CeedQFunctionContext ctx_p; 935 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 936 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 937 sizeof(*num_comp_p_data), num_comp_p_data); 938 CeedChk(ierr); 939 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 940 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 941 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 942 CeedChk(ierr); 943 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 944 CeedChk(ierr); 945 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 946 CeedChk(ierr); 947 948 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 949 CEED_QFUNCTION_NONE, op_prolong); 950 CeedChk(ierr); 951 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 952 CEED_VECTOR_ACTIVE); CeedChk(ierr); 953 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 954 CEED_BASIS_COLLOCATED, mult_vec); 955 CeedChk(ierr); 956 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 957 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 958 CeedChk(ierr); 959 960 // Cleanup 961 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 962 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 963 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 964 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 965 return CEED_ERROR_SUCCESS; 966 } 967 968 /** 969 @brief Build 1D mass matrix and Laplacian with perturbation 970 971 @param[in] interp_1d Interpolation matrix in one dimension 972 @param[in] grad_1d Gradient matrix in one dimension 973 @param[in] q_weight_1d Quadrature weights in one dimension 974 @param[in] P_1d Number of basis nodes in one dimension 975 @param[in] Q_1d Number of quadrature points in one dimension 976 @param[in] dim Dimension of basis 977 @param[out] mass Assembled mass matrix in one dimension 978 @param[out] laplace Assembled perturbed Laplacian in one dimension 979 980 @return An error code: 0 - success, otherwise - failure 981 982 @ref Developer 983 **/ 984 CeedPragmaOptimizeOff 985 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 986 const CeedScalar *grad_1d, 987 const CeedScalar *q_weight_1d, CeedInt P_1d, 988 CeedInt Q_1d, CeedInt dim, 989 CeedScalar *mass, CeedScalar *laplace) { 990 991 for (CeedInt i=0; i<P_1d; i++) 992 for (CeedInt j=0; j<P_1d; j++) { 993 CeedScalar sum = 0.0; 994 for (CeedInt k=0; k<Q_1d; k++) 995 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 996 mass[i+j*P_1d] = sum; 997 } 998 // -- Laplacian 999 for (CeedInt i=0; i<P_1d; i++) 1000 for (CeedInt j=0; j<P_1d; j++) { 1001 CeedScalar sum = 0.0; 1002 for (CeedInt k=0; k<Q_1d; k++) 1003 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1004 laplace[i+j*P_1d] = sum; 1005 } 1006 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1007 for (CeedInt i=0; i<P_1d; i++) 1008 laplace[i+P_1d*i] += perturbation; 1009 return CEED_ERROR_SUCCESS; 1010 } 1011 CeedPragmaOptimizeOn 1012 1013 /// @} 1014 1015 /// ---------------------------------------------------------------------------- 1016 /// CeedOperator Backend API 1017 /// ---------------------------------------------------------------------------- 1018 /// @addtogroup CeedOperatorBackend 1019 /// @{ 1020 1021 /** 1022 @brief Create object holding CeedQFunction assembly data for CeedOperator 1023 1024 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1025 @param[out] data Address of the variable where the newly created 1026 CeedQFunctionAssemblyData will be stored 1027 1028 @return An error code: 0 - success, otherwise - failure 1029 1030 @ref Backend 1031 **/ 1032 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1033 CeedQFunctionAssemblyData *data) { 1034 int ierr; 1035 1036 ierr = CeedCalloc(1, data); CeedChk(ierr); 1037 (*data)->ref_count = 1; 1038 (*data)->ceed = ceed; 1039 ierr = CeedReference(ceed); CeedChk(ierr); 1040 1041 return CEED_ERROR_SUCCESS; 1042 } 1043 1044 /** 1045 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1046 1047 @param data CeedQFunctionAssemblyData to increment the reference counter 1048 1049 @return An error code: 0 - success, otherwise - failure 1050 1051 @ref Backend 1052 **/ 1053 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1054 data->ref_count++; 1055 return CEED_ERROR_SUCCESS; 1056 } 1057 1058 /** 1059 @brief Set re-use of CeedQFunctionAssemblyData 1060 1061 @param data CeedQFunctionAssemblyData to mark for reuse 1062 @param reuse_data Boolean flag indicating data re-use 1063 1064 @return An error code: 0 - success, otherwise - failure 1065 1066 @ref Backend 1067 **/ 1068 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, 1069 bool reuse_data) { 1070 data->reuse_data = reuse_data; 1071 data->needs_data_update = true; 1072 return CEED_ERROR_SUCCESS; 1073 } 1074 1075 /** 1076 @brief Mark QFunctionAssemblyData as stale 1077 1078 @param data CeedQFunctionAssemblyData to mark as stale 1079 @param needs_data_update Boolean flag indicating if update is needed or completed 1080 1081 @return An error code: 0 - success, otherwise - failure 1082 1083 @ref Backend 1084 **/ 1085 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, 1086 bool needs_data_update) { 1087 data->needs_data_update = needs_data_update; 1088 return CEED_ERROR_SUCCESS; 1089 } 1090 1091 /** 1092 @brief Determine if QFunctionAssemblyData needs update 1093 1094 @param[in] data CeedQFunctionAssemblyData to mark as stale 1095 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1096 1097 @return An error code: 0 - success, otherwise - failure 1098 1099 @ref Backend 1100 **/ 1101 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1102 bool *is_update_needed) { 1103 *is_update_needed = !data->reuse_data || data->needs_data_update; 1104 return CEED_ERROR_SUCCESS; 1105 } 1106 1107 /** 1108 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1109 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1110 Note: If `*data_copy` is non-NULL, then it is assumed that 1111 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1112 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1113 the only reference to this CeedQFunctionAssemblyData. 1114 1115 @param data CeedQFunctionAssemblyData to copy reference to 1116 @param[out] data_copy Variable to store copied reference 1117 1118 @return An error code: 0 - success, otherwise - failure 1119 1120 @ref Backend 1121 **/ 1122 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1123 CeedQFunctionAssemblyData *data_copy) { 1124 int ierr; 1125 1126 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1127 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1128 *data_copy = data; 1129 return CEED_ERROR_SUCCESS; 1130 } 1131 1132 /** 1133 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1134 1135 @param[in] data CeedQFunctionAssemblyData to retreive status 1136 @param[out] is_setup Boolean flag for setup status 1137 1138 @return An error code: 0 - success, otherwise - failure 1139 1140 @ref Backend 1141 **/ 1142 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1143 bool *is_setup) { 1144 *is_setup = data->is_setup; 1145 return CEED_ERROR_SUCCESS; 1146 } 1147 1148 /** 1149 @brief Set internal objects for CeedQFunctionAssemblyData 1150 1151 @param[in] data CeedQFunctionAssemblyData to set objects 1152 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1153 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1154 1155 @return An error code: 0 - success, otherwise - failure 1156 1157 @ref Backend 1158 **/ 1159 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1160 CeedVector vec, CeedElemRestriction rstr) { 1161 int ierr; 1162 1163 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1164 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1165 1166 data->is_setup = true; 1167 return CEED_ERROR_SUCCESS; 1168 } 1169 1170 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1171 CeedVector *vec, CeedElemRestriction *rstr) { 1172 int ierr; 1173 1174 if (!data->is_setup) 1175 // LCOV_EXCL_START 1176 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1177 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1178 // LCOV_EXCL_STOP 1179 1180 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1181 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1182 1183 return CEED_ERROR_SUCCESS; 1184 } 1185 1186 /** 1187 @brief Destroy CeedQFunctionAssemblyData 1188 1189 @param[out] data CeedQFunctionAssemblyData to destroy 1190 1191 @return An error code: 0 - success, otherwise - failure 1192 1193 @ref Backend 1194 **/ 1195 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1196 int ierr; 1197 1198 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1199 1200 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1201 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1202 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1203 1204 ierr = CeedFree(data); CeedChk(ierr); 1205 return CEED_ERROR_SUCCESS; 1206 } 1207 1208 /// @} 1209 1210 /// ---------------------------------------------------------------------------- 1211 /// CeedOperator Public API 1212 /// ---------------------------------------------------------------------------- 1213 /// @addtogroup CeedOperatorUser 1214 /// @{ 1215 1216 /** 1217 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1218 1219 This returns a CeedVector containing a matrix at each quadrature point 1220 providing the action of the CeedQFunction associated with the CeedOperator. 1221 The vector 'assembled' is of shape 1222 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1223 and contains column-major matrices representing the action of the 1224 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1225 outputs are in the order provided by the user when adding CeedOperator fields. 1226 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1227 'v', provided in that order, would result in an assembled QFunction that 1228 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1229 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1230 1231 Note: Calling this function asserts that setup is complete 1232 and sets the CeedOperator as immutable. 1233 1234 @param op CeedOperator to assemble CeedQFunction 1235 @param[out] assembled CeedVector to store assembled CeedQFunction at 1236 quadrature points 1237 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1238 CeedQFunction 1239 @param request Address of CeedRequest for non-blocking completion, else 1240 @ref CEED_REQUEST_IMMEDIATE 1241 1242 @return An error code: 0 - success, otherwise - failure 1243 1244 @ref User 1245 **/ 1246 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1247 CeedElemRestriction *rstr, 1248 CeedRequest *request) { 1249 int ierr; 1250 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1251 1252 // Backend version 1253 if (op->LinearAssembleQFunction) { 1254 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1255 CeedChk(ierr); 1256 } else { 1257 // Fallback to reference Ceed 1258 if (!op->op_fallback) { 1259 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1260 } 1261 // Assemble 1262 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1263 rstr, request); CeedChk(ierr); 1264 } 1265 return CEED_ERROR_SUCCESS; 1266 } 1267 1268 /** 1269 @brief Assemble CeedQFunction and store result internall. Return copied 1270 references of stored data to the caller. Caller is responsible for 1271 ownership and destruction of the copied references. See also 1272 @ref CeedOperatorLinearAssembleQFunction 1273 1274 @param op CeedOperator to assemble CeedQFunction 1275 @param assembled CeedVector to store assembled CeedQFunction at 1276 quadrature points 1277 @param rstr CeedElemRestriction for CeedVector containing assembled 1278 CeedQFunction 1279 @param request Address of CeedRequest for non-blocking completion, else 1280 @ref CEED_REQUEST_IMMEDIATE 1281 1282 @return An error code: 0 - success, otherwise - failure 1283 1284 @ref User 1285 **/ 1286 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1287 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1288 int ierr; 1289 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1290 1291 // Backend version 1292 if (op->LinearAssembleQFunctionUpdate) { 1293 bool qf_assembled_is_setup; 1294 CeedVector assembled_vec = NULL; 1295 CeedElemRestriction assembled_rstr = NULL; 1296 1297 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1298 &qf_assembled_is_setup); CeedChk(ierr); 1299 if (qf_assembled_is_setup) { 1300 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1301 &assembled_rstr); CeedChk(ierr); 1302 1303 bool update_needed; 1304 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1305 &update_needed); CeedChk(ierr); 1306 if (update_needed) { 1307 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1308 request); CeedChk(ierr); 1309 } 1310 } else { 1311 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1312 request); CeedChk(ierr); 1313 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1314 assembled_rstr); CeedChk(ierr); 1315 } 1316 ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); 1317 CeedChk(ierr); 1318 1319 // Copy reference to internally held copy 1320 *assembled = NULL; 1321 *rstr = NULL; 1322 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1323 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1324 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1325 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1326 } else { 1327 // Fallback to reference Ceed 1328 if (!op->op_fallback) { 1329 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1330 } 1331 // Assemble 1332 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1333 assembled, rstr, request); CeedChk(ierr); 1334 } 1335 1336 return CEED_ERROR_SUCCESS; 1337 } 1338 1339 /** 1340 @brief Assemble the diagonal of a square linear CeedOperator 1341 1342 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1343 1344 Note: Currently only non-composite CeedOperators with a single field and 1345 composite CeedOperators with single field sub-operators are supported. 1346 1347 Note: Calling this function asserts that setup is complete 1348 and sets the CeedOperator as immutable. 1349 1350 @param op CeedOperator to assemble CeedQFunction 1351 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1352 @param request Address of CeedRequest for non-blocking completion, else 1353 @ref CEED_REQUEST_IMMEDIATE 1354 1355 @return An error code: 0 - success, otherwise - failure 1356 1357 @ref User 1358 **/ 1359 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1360 CeedRequest *request) { 1361 int ierr; 1362 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1363 1364 CeedSize input_size = 0, output_size = 0; 1365 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1366 CeedChk(ierr); 1367 if (input_size != output_size) 1368 // LCOV_EXCL_START 1369 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1370 // LCOV_EXCL_STOP 1371 1372 // Use backend version, if available 1373 if (op->LinearAssembleDiagonal) { 1374 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1375 return CEED_ERROR_SUCCESS; 1376 } else if (op->LinearAssembleAddDiagonal) { 1377 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1378 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1379 return CEED_ERROR_SUCCESS; 1380 } else { 1381 // Check for valid fallback resource 1382 const char *resource, *fallback_resource; 1383 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1384 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1385 CeedChk(ierr); 1386 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1387 // Fallback to reference Ceed 1388 if (!op->op_fallback) { 1389 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1390 } 1391 // Assemble 1392 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1393 CeedChk(ierr); 1394 return CEED_ERROR_SUCCESS; 1395 } 1396 } 1397 1398 // Default interface implementation 1399 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1400 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1401 CeedChk(ierr); 1402 return CEED_ERROR_SUCCESS; 1403 } 1404 1405 /** 1406 @brief Assemble the diagonal of a square linear CeedOperator 1407 1408 This sums into a CeedVector the diagonal of a linear CeedOperator. 1409 1410 Note: Currently only non-composite CeedOperators with a single field and 1411 composite CeedOperators with single field sub-operators are supported. 1412 1413 Note: Calling this function asserts that setup is complete 1414 and sets the CeedOperator as immutable. 1415 1416 @param op CeedOperator to assemble CeedQFunction 1417 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1418 @param request Address of CeedRequest for non-blocking completion, else 1419 @ref CEED_REQUEST_IMMEDIATE 1420 1421 @return An error code: 0 - success, otherwise - failure 1422 1423 @ref User 1424 **/ 1425 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1426 CeedRequest *request) { 1427 int ierr; 1428 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1429 1430 CeedSize input_size = 0, output_size = 0; 1431 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1432 CeedChk(ierr); 1433 if (input_size != output_size) 1434 // LCOV_EXCL_START 1435 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1436 // LCOV_EXCL_STOP 1437 1438 // Use backend version, if available 1439 if (op->LinearAssembleAddDiagonal) { 1440 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1441 return CEED_ERROR_SUCCESS; 1442 } else { 1443 // Check for valid fallback resource 1444 const char *resource, *fallback_resource; 1445 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1446 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1447 CeedChk(ierr); 1448 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1449 // Fallback to reference Ceed 1450 if (!op->op_fallback) { 1451 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1452 } 1453 // Assemble 1454 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1455 request); CeedChk(ierr); 1456 return CEED_ERROR_SUCCESS; 1457 } 1458 } 1459 1460 // Default interface implementation 1461 bool is_composite; 1462 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1463 if (is_composite) { 1464 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1465 false, assembled); CeedChk(ierr); 1466 return CEED_ERROR_SUCCESS; 1467 } else { 1468 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1469 CeedChk(ierr); 1470 return CEED_ERROR_SUCCESS; 1471 } 1472 } 1473 1474 /** 1475 @brief Assemble the point block diagonal of a square linear CeedOperator 1476 1477 This overwrites a CeedVector with the point block diagonal of a linear 1478 CeedOperator. 1479 1480 Note: Currently only non-composite CeedOperators with a single field and 1481 composite CeedOperators with single field sub-operators are supported. 1482 1483 Note: Calling this function asserts that setup is complete 1484 and sets the CeedOperator as immutable. 1485 1486 @param op CeedOperator to assemble CeedQFunction 1487 @param[out] assembled CeedVector to store assembled CeedOperator point block 1488 diagonal, provided in row-major form with an 1489 @a num_comp * @a num_comp block at each node. The dimensions 1490 of this vector are derived from the active vector 1491 for the CeedOperator. The array has shape 1492 [nodes, component out, component in]. 1493 @param request Address of CeedRequest for non-blocking completion, else 1494 CEED_REQUEST_IMMEDIATE 1495 1496 @return An error code: 0 - success, otherwise - failure 1497 1498 @ref User 1499 **/ 1500 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1501 CeedVector assembled, CeedRequest *request) { 1502 int ierr; 1503 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1504 1505 CeedSize input_size = 0, output_size = 0; 1506 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1507 CeedChk(ierr); 1508 if (input_size != output_size) 1509 // LCOV_EXCL_START 1510 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1511 // LCOV_EXCL_STOP 1512 1513 // Use backend version, if available 1514 if (op->LinearAssemblePointBlockDiagonal) { 1515 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1516 CeedChk(ierr); 1517 return CEED_ERROR_SUCCESS; 1518 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1519 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1520 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1521 request); CeedChk(ierr); 1522 return CEED_ERROR_SUCCESS; 1523 } else { 1524 // Check for valid fallback resource 1525 const char *resource, *fallback_resource; 1526 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1527 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1528 CeedChk(ierr); 1529 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1530 // Fallback to reference Ceed 1531 if (!op->op_fallback) { 1532 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1533 } 1534 // Assemble 1535 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1536 assembled, request); CeedChk(ierr); 1537 return CEED_ERROR_SUCCESS; 1538 } 1539 } 1540 1541 // Default interface implementation 1542 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1543 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1544 CeedChk(ierr); 1545 return CEED_ERROR_SUCCESS; 1546 } 1547 1548 /** 1549 @brief Assemble the point block diagonal of a square linear CeedOperator 1550 1551 This sums into a CeedVector with the point block diagonal of a linear 1552 CeedOperator. 1553 1554 Note: Currently only non-composite CeedOperators with a single field and 1555 composite CeedOperators with single field sub-operators are supported. 1556 1557 Note: Calling this function asserts that setup is complete 1558 and sets the CeedOperator as immutable. 1559 1560 @param op CeedOperator to assemble CeedQFunction 1561 @param[out] assembled CeedVector to store assembled CeedOperator point block 1562 diagonal, provided in row-major form with an 1563 @a num_comp * @a num_comp block at each node. The dimensions 1564 of this vector are derived from the active vector 1565 for the CeedOperator. The array has shape 1566 [nodes, component out, component in]. 1567 @param request Address of CeedRequest for non-blocking completion, else 1568 CEED_REQUEST_IMMEDIATE 1569 1570 @return An error code: 0 - success, otherwise - failure 1571 1572 @ref User 1573 **/ 1574 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1575 CeedVector assembled, CeedRequest *request) { 1576 int ierr; 1577 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1578 1579 CeedSize input_size = 0, output_size = 0; 1580 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1581 CeedChk(ierr); 1582 if (input_size != output_size) 1583 // LCOV_EXCL_START 1584 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1585 // LCOV_EXCL_STOP 1586 1587 // Use backend version, if available 1588 if (op->LinearAssembleAddPointBlockDiagonal) { 1589 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1590 CeedChk(ierr); 1591 return CEED_ERROR_SUCCESS; 1592 } else { 1593 // Check for valid fallback resource 1594 const char *resource, *fallback_resource; 1595 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1596 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1597 CeedChk(ierr); 1598 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1599 // Fallback to reference Ceed 1600 if (!op->op_fallback) { 1601 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1602 } 1603 // Assemble 1604 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1605 assembled, request); CeedChk(ierr); 1606 return CEED_ERROR_SUCCESS; 1607 } 1608 } 1609 1610 // Default interface implemenation 1611 bool is_composite; 1612 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1613 if (is_composite) { 1614 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1615 true, assembled); CeedChk(ierr); 1616 return CEED_ERROR_SUCCESS; 1617 } else { 1618 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1619 CeedChk(ierr); 1620 return CEED_ERROR_SUCCESS; 1621 } 1622 } 1623 1624 /** 1625 @brief Fully assemble the nonzero pattern of a linear operator. 1626 1627 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1628 1629 The assembly routines use coordinate format, with num_entries tuples of the 1630 form (i, j, value) which indicate that value should be added to the matrix 1631 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1632 This function returns the number of entries and their (i, j) locations, 1633 while CeedOperatorLinearAssemble() provides the values in the same 1634 ordering. 1635 1636 This will generally be slow unless your operator is low-order. 1637 1638 Note: Calling this function asserts that setup is complete 1639 and sets the CeedOperator as immutable. 1640 1641 @param[in] op CeedOperator to assemble 1642 @param[out] num_entries Number of entries in coordinate nonzero pattern 1643 @param[out] rows Row number for each entry 1644 @param[out] cols Column number for each entry 1645 1646 @ref User 1647 **/ 1648 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1649 CeedInt **rows, CeedInt **cols) { 1650 int ierr; 1651 CeedInt num_suboperators, single_entries; 1652 CeedOperator *sub_operators; 1653 bool is_composite; 1654 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1655 1656 // Use backend version, if available 1657 if (op->LinearAssembleSymbolic) { 1658 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1659 return CEED_ERROR_SUCCESS; 1660 } else { 1661 // Check for valid fallback resource 1662 const char *resource, *fallback_resource; 1663 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1664 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1665 CeedChk(ierr); 1666 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1667 // Fallback to reference Ceed 1668 if (!op->op_fallback) { 1669 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1670 } 1671 // Assemble 1672 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1673 cols); CeedChk(ierr); 1674 return CEED_ERROR_SUCCESS; 1675 } 1676 } 1677 1678 // Default interface implementation 1679 1680 // count entries and allocate rows, cols arrays 1681 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1682 *num_entries = 0; 1683 if (is_composite) { 1684 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1685 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1686 for (int k = 0; k < num_suboperators; ++k) { 1687 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1688 &single_entries); CeedChk(ierr); 1689 *num_entries += single_entries; 1690 } 1691 } else { 1692 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1693 &single_entries); CeedChk(ierr); 1694 *num_entries += single_entries; 1695 } 1696 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1697 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1698 1699 // assemble nonzero locations 1700 CeedInt offset = 0; 1701 if (is_composite) { 1702 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1703 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1704 for (int k = 0; k < num_suboperators; ++k) { 1705 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1706 *cols); CeedChk(ierr); 1707 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1708 &single_entries); 1709 CeedChk(ierr); 1710 offset += single_entries; 1711 } 1712 } else { 1713 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1714 CeedChk(ierr); 1715 } 1716 1717 return CEED_ERROR_SUCCESS; 1718 } 1719 1720 /** 1721 @brief Fully assemble the nonzero entries of a linear operator. 1722 1723 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1724 1725 The assembly routines use coordinate format, with num_entries tuples of the 1726 form (i, j, value) which indicate that value should be added to the matrix 1727 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1728 This function returns the values of the nonzero entries to be added, their 1729 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1730 1731 This will generally be slow unless your operator is low-order. 1732 1733 Note: Calling this function asserts that setup is complete 1734 and sets the CeedOperator as immutable. 1735 1736 @param[in] op CeedOperator to assemble 1737 @param[out] values Values to assemble into matrix 1738 1739 @ref User 1740 **/ 1741 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1742 int ierr; 1743 CeedInt num_suboperators, single_entries = 0; 1744 CeedOperator *sub_operators; 1745 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1746 1747 // Use backend version, if available 1748 if (op->LinearAssemble) { 1749 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1750 return CEED_ERROR_SUCCESS; 1751 } else { 1752 // Check for valid fallback resource 1753 const char *resource, *fallback_resource; 1754 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1755 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1756 CeedChk(ierr); 1757 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1758 // Fallback to reference Ceed 1759 if (!op->op_fallback) { 1760 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1761 } 1762 // Assemble 1763 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1764 return CEED_ERROR_SUCCESS; 1765 } 1766 } 1767 1768 // Default interface implementation 1769 bool is_composite; 1770 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1771 1772 CeedInt offset = 0; 1773 if (is_composite) { 1774 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1775 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1776 for (int k = 0; k < num_suboperators; ++k) { 1777 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1778 CeedChk(ierr); 1779 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1780 &single_entries); 1781 CeedChk(ierr); 1782 offset += single_entries; 1783 } 1784 } else { 1785 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1786 } 1787 1788 return CEED_ERROR_SUCCESS; 1789 } 1790 1791 /** 1792 @brief Create a multigrid coarse operator and level transfer operators 1793 for a CeedOperator, creating the prolongation basis from the 1794 fine and coarse grid interpolation 1795 1796 Note: Calling this function asserts that setup is complete 1797 and sets the CeedOperator as immutable. 1798 1799 @param[in] op_fine Fine grid operator 1800 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1801 @param[in] rstr_coarse Coarse grid restriction 1802 @param[in] basis_coarse Coarse grid active vector basis 1803 @param[out] op_coarse Coarse grid operator 1804 @param[out] op_prolong Coarse to fine operator 1805 @param[out] op_restrict Fine to coarse operator 1806 1807 @return An error code: 0 - success, otherwise - failure 1808 1809 @ref User 1810 **/ 1811 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1812 CeedVector p_mult_fine, 1813 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1814 CeedOperator *op_coarse, CeedOperator *op_prolong, 1815 CeedOperator *op_restrict) { 1816 int ierr; 1817 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1818 Ceed ceed; 1819 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1820 1821 // Check for compatible quadrature spaces 1822 CeedBasis basis_fine; 1823 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1824 CeedInt Q_f, Q_c; 1825 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1826 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1827 if (Q_f != Q_c) 1828 // LCOV_EXCL_START 1829 return CeedError(ceed, CEED_ERROR_DIMENSION, 1830 "Bases must have compatible quadrature spaces"); 1831 // LCOV_EXCL_STOP 1832 1833 // Coarse to fine basis 1834 CeedInt P_f, P_c, Q = Q_f; 1835 bool is_tensor_f, is_tensor_c; 1836 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1837 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1838 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1839 if (is_tensor_f && is_tensor_c) { 1840 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1841 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1842 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1843 } else if (!is_tensor_f && !is_tensor_c) { 1844 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1845 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1846 } else { 1847 // LCOV_EXCL_START 1848 return CeedError(ceed, CEED_ERROR_MINOR, 1849 "Bases must both be tensor or non-tensor"); 1850 // LCOV_EXCL_STOP 1851 } 1852 1853 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1854 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1855 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1856 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1857 if (is_tensor_f) { 1858 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1859 memcpy(interp_c, basis_coarse->interp_1d, 1860 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1861 } else { 1862 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1863 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1864 } 1865 1866 // -- QR Factorization, interp_f = Q R 1867 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1868 1869 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1870 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1871 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1872 1873 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1874 for (CeedInt j=0; j<P_c; j++) { // Column j 1875 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1876 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1877 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1878 for (CeedInt k=i+1; k<P_f; k++) 1879 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1880 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1881 } 1882 } 1883 ierr = CeedFree(&tau); CeedChk(ierr); 1884 ierr = CeedFree(&interp_c); CeedChk(ierr); 1885 ierr = CeedFree(&interp_f); CeedChk(ierr); 1886 1887 // Complete with interp_c_to_f versions of code 1888 if (is_tensor_f) { 1889 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1890 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1891 CeedChk(ierr); 1892 } else { 1893 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1894 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1895 CeedChk(ierr); 1896 } 1897 1898 // Cleanup 1899 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1900 return CEED_ERROR_SUCCESS; 1901 } 1902 1903 /** 1904 @brief Create a multigrid coarse operator and level transfer operators 1905 for a CeedOperator with a tensor basis for the active basis 1906 1907 Note: Calling this function asserts that setup is complete 1908 and sets the CeedOperator as immutable. 1909 1910 @param[in] op_fine Fine grid operator 1911 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1912 @param[in] rstr_coarse Coarse grid restriction 1913 @param[in] basis_coarse Coarse grid active vector basis 1914 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1915 @param[out] op_coarse Coarse grid operator 1916 @param[out] op_prolong Coarse to fine operator 1917 @param[out] op_restrict Fine to coarse operator 1918 1919 @return An error code: 0 - success, otherwise - failure 1920 1921 @ref User 1922 **/ 1923 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1924 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1925 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1926 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1927 int ierr; 1928 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1929 Ceed ceed; 1930 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1931 1932 // Check for compatible quadrature spaces 1933 CeedBasis basis_fine; 1934 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1935 CeedInt Q_f, Q_c; 1936 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1937 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1938 if (Q_f != Q_c) 1939 // LCOV_EXCL_START 1940 return CeedError(ceed, CEED_ERROR_DIMENSION, 1941 "Bases must have compatible quadrature spaces"); 1942 // LCOV_EXCL_STOP 1943 1944 // Coarse to fine basis 1945 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1946 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1947 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1948 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1949 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1950 CeedChk(ierr); 1951 P_1d_c = dim == 1 ? num_nodes_c : 1952 dim == 2 ? sqrt(num_nodes_c) : 1953 cbrt(num_nodes_c); 1954 CeedScalar *q_ref, *q_weight, *grad; 1955 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1956 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1957 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1958 CeedBasis basis_c_to_f; 1959 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1960 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1961 CeedChk(ierr); 1962 ierr = CeedFree(&q_ref); CeedChk(ierr); 1963 ierr = CeedFree(&q_weight); CeedChk(ierr); 1964 ierr = CeedFree(&grad); CeedChk(ierr); 1965 1966 // Core code 1967 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1968 basis_coarse, basis_c_to_f, op_coarse, 1969 op_prolong, op_restrict); 1970 CeedChk(ierr); 1971 return CEED_ERROR_SUCCESS; 1972 } 1973 1974 /** 1975 @brief Create a multigrid coarse operator and level transfer operators 1976 for a CeedOperator with a non-tensor basis for the active vector 1977 1978 Note: Calling this function asserts that setup is complete 1979 and sets the CeedOperator as immutable. 1980 1981 @param[in] op_fine Fine grid operator 1982 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1983 @param[in] rstr_coarse Coarse grid restriction 1984 @param[in] basis_coarse Coarse grid active vector basis 1985 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1986 @param[out] op_coarse Coarse grid operator 1987 @param[out] op_prolong Coarse to fine operator 1988 @param[out] op_restrict Fine to coarse operator 1989 1990 @return An error code: 0 - success, otherwise - failure 1991 1992 @ref User 1993 **/ 1994 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1995 CeedVector p_mult_fine, 1996 CeedElemRestriction rstr_coarse, 1997 CeedBasis basis_coarse, 1998 const CeedScalar *interp_c_to_f, 1999 CeedOperator *op_coarse, 2000 CeedOperator *op_prolong, 2001 CeedOperator *op_restrict) { 2002 int ierr; 2003 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2004 Ceed ceed; 2005 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2006 2007 // Check for compatible quadrature spaces 2008 CeedBasis basis_fine; 2009 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2010 CeedInt Q_f, Q_c; 2011 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2012 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2013 if (Q_f != Q_c) 2014 // LCOV_EXCL_START 2015 return CeedError(ceed, CEED_ERROR_DIMENSION, 2016 "Bases must have compatible quadrature spaces"); 2017 // LCOV_EXCL_STOP 2018 2019 // Coarse to fine basis 2020 CeedElemTopology topo; 2021 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 2022 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2023 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2024 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2025 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 2026 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2027 CeedChk(ierr); 2028 CeedScalar *q_ref, *q_weight, *grad; 2029 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 2030 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 2031 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 2032 CeedBasis basis_c_to_f; 2033 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 2034 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2035 CeedChk(ierr); 2036 ierr = CeedFree(&q_ref); CeedChk(ierr); 2037 ierr = CeedFree(&q_weight); CeedChk(ierr); 2038 ierr = CeedFree(&grad); CeedChk(ierr); 2039 2040 // Core code 2041 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2042 basis_coarse, basis_c_to_f, op_coarse, 2043 op_prolong, op_restrict); 2044 CeedChk(ierr); 2045 return CEED_ERROR_SUCCESS; 2046 } 2047 2048 /** 2049 @brief Build a FDM based approximate inverse for each element for a 2050 CeedOperator 2051 2052 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2053 Method based approximate inverse. This function obtains the simultaneous 2054 diagonalization for the 1D mass and Laplacian operators, 2055 M = V^T V, K = V^T S V. 2056 The assembled QFunction is used to modify the eigenvalues from simultaneous 2057 diagonalization and obtain an approximate inverse of the form 2058 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2059 associated CeedQFunction must therefore also be linear. 2060 2061 Note: Calling this function asserts that setup is complete 2062 and sets the CeedOperator as immutable. 2063 2064 @param op CeedOperator to create element inverses 2065 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2066 for each element 2067 @param request Address of CeedRequest for non-blocking completion, else 2068 @ref CEED_REQUEST_IMMEDIATE 2069 2070 @return An error code: 0 - success, otherwise - failure 2071 2072 @ref User 2073 **/ 2074 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2075 CeedRequest *request) { 2076 int ierr; 2077 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2078 2079 // Use backend version, if available 2080 if (op->CreateFDMElementInverse) { 2081 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2082 return CEED_ERROR_SUCCESS; 2083 } else { 2084 // Check for valid fallback resource 2085 const char *resource, *fallback_resource; 2086 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2087 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2088 CeedChk(ierr); 2089 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2090 // Fallback to reference Ceed 2091 if (!op->op_fallback) { 2092 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2093 } 2094 // Assemble 2095 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2096 CeedChk(ierr); 2097 return CEED_ERROR_SUCCESS; 2098 } 2099 } 2100 2101 // Interface implementation 2102 Ceed ceed, ceed_parent; 2103 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2104 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2105 ceed_parent = ceed_parent ? ceed_parent : ceed; 2106 CeedQFunction qf; 2107 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2108 2109 // Determine active input basis 2110 bool interp = false, grad = false; 2111 CeedBasis basis = NULL; 2112 CeedElemRestriction rstr = NULL; 2113 CeedOperatorField *op_fields; 2114 CeedQFunctionField *qf_fields; 2115 CeedInt num_input_fields; 2116 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2117 CeedChk(ierr); 2118 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2119 for (CeedInt i=0; i<num_input_fields; i++) { 2120 CeedVector vec; 2121 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2122 if (vec == CEED_VECTOR_ACTIVE) { 2123 CeedEvalMode eval_mode; 2124 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2125 interp = interp || eval_mode == CEED_EVAL_INTERP; 2126 grad = grad || eval_mode == CEED_EVAL_GRAD; 2127 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2128 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2129 } 2130 } 2131 if (!basis) 2132 // LCOV_EXCL_START 2133 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2134 // LCOV_EXCL_STOP 2135 CeedSize l_size = 1; 2136 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2137 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2138 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2139 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2140 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2141 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2142 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2143 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2144 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2145 2146 // Build and diagonalize 1D Mass and Laplacian 2147 bool tensor_basis; 2148 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2149 if (!tensor_basis) 2150 // LCOV_EXCL_START 2151 return CeedError(ceed, CEED_ERROR_BACKEND, 2152 "FDMElementInverse only supported for tensor " 2153 "bases"); 2154 // LCOV_EXCL_STOP 2155 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2156 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2157 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2158 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2159 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2160 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2161 // -- Build matrices 2162 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2163 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2164 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2165 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2166 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2167 mass, laplace); CeedChk(ierr); 2168 2169 // -- Diagonalize 2170 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2171 CeedChk(ierr); 2172 ierr = CeedFree(&mass); CeedChk(ierr); 2173 ierr = CeedFree(&laplace); CeedChk(ierr); 2174 for (CeedInt i=0; i<P_1d; i++) 2175 for (CeedInt j=0; j<P_1d; j++) 2176 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2177 ierr = CeedFree(&x); CeedChk(ierr); 2178 2179 // Assemble QFunction 2180 CeedVector assembled; 2181 CeedElemRestriction rstr_qf; 2182 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2183 &rstr_qf, request); CeedChk(ierr); 2184 CeedInt layout[3]; 2185 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2186 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2187 CeedScalar max_norm = 0; 2188 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2189 2190 // Calculate element averages 2191 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2192 CeedScalar *elem_avg; 2193 const CeedScalar *assembled_array, *q_weight_array; 2194 CeedVector q_weight; 2195 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2196 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2197 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2198 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2199 CeedChk(ierr); 2200 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2201 CeedChk(ierr); 2202 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2203 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2204 for (CeedInt e=0; e<num_elem; e++) { 2205 CeedInt count = 0; 2206 for (CeedInt q=0; q<num_qpts; q++) 2207 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2208 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2209 qf_value_bound) { 2210 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2211 q_weight_array[q]; 2212 count++; 2213 } 2214 if (count) { 2215 elem_avg[e] /= count; 2216 } else { 2217 elem_avg[e] = 1.0; 2218 } 2219 } 2220 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2221 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2222 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2223 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2224 2225 // Build FDM diagonal 2226 CeedVector q_data; 2227 CeedScalar *q_data_array, *fdm_diagonal; 2228 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2229 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2230 for (CeedInt c=0; c<num_comp; c++) 2231 for (CeedInt n=0; n<elem_size; n++) { 2232 if (interp) 2233 fdm_diagonal[c*elem_size + n] = 1.0; 2234 if (grad) 2235 for (CeedInt d=0; d<dim; d++) { 2236 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2237 fdm_diagonal[c*elem_size + n] += lambda[i]; 2238 } 2239 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2240 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2241 } 2242 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2243 CeedChk(ierr); 2244 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2245 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2246 CeedChk(ierr); 2247 for (CeedInt e=0; e<num_elem; e++) 2248 for (CeedInt c=0; c<num_comp; c++) 2249 for (CeedInt n=0; n<elem_size; n++) 2250 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2251 fdm_diagonal[c*elem_size + n]); 2252 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2253 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2254 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2255 2256 // Setup FDM operator 2257 // -- Basis 2258 CeedBasis fdm_basis; 2259 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2260 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2261 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2262 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2263 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2264 fdm_interp, grad_dummy, q_ref_dummy, 2265 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2266 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2267 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2268 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2269 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2270 ierr = CeedFree(&lambda); CeedChk(ierr); 2271 2272 // -- Restriction 2273 CeedElemRestriction rstr_qd_i; 2274 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2275 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2276 num_comp, num_elem*num_comp*elem_size, 2277 strides, &rstr_qd_i); CeedChk(ierr); 2278 // -- QFunction 2279 CeedQFunction qf_fdm; 2280 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2281 CeedChk(ierr); 2282 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2283 CeedChk(ierr); 2284 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2285 CeedChk(ierr); 2286 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2287 CeedChk(ierr); 2288 // -- QFunction context 2289 CeedInt *num_comp_data; 2290 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2291 num_comp_data[0] = num_comp; 2292 CeedQFunctionContext ctx_fdm; 2293 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2294 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2295 sizeof(*num_comp_data), num_comp_data); 2296 CeedChk(ierr); 2297 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2298 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2299 // -- Operator 2300 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2301 CeedChk(ierr); 2302 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2303 CeedChk(ierr); 2304 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2305 q_data); CeedChk(ierr); 2306 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2307 CeedChk(ierr); 2308 2309 // Cleanup 2310 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2311 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2312 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2313 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2314 2315 return CEED_ERROR_SUCCESS; 2316 } 2317 2318 /// @} 2319