1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed-impl.h> 11 #include <math.h> 12 #include <stdbool.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 /// @file 17 /// Implementation of CeedOperator preconditioning interfaces 18 19 /// ---------------------------------------------------------------------------- 20 /// CeedOperator Library Internal Preconditioning Functions 21 /// ---------------------------------------------------------------------------- 22 /// @addtogroup CeedOperatorDeveloper 23 /// @{ 24 25 /** 26 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 27 CeedOperator functionality 28 29 @param op CeedOperator to create fallback for 30 31 @return An error code: 0 - success, otherwise - failure 32 33 @ref Developer 34 **/ 35 int CeedOperatorCreateFallback(CeedOperator op) { 36 int ierr; 37 38 // Fallback Ceed 39 const char *resource, *fallback_resource; 40 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 41 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 42 CeedChk(ierr); 43 if (!strcmp(resource, fallback_resource)) 44 // LCOV_EXCL_START 45 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 46 "Backend %s cannot create an operator" 47 "fallback to resource %s", resource, fallback_resource); 48 // LCOV_EXCL_STOP 49 50 // Fallback Ceed 51 Ceed ceed_ref; 52 if (!op->ceed->op_fallback_ceed) { 53 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 54 ceed_ref->op_fallback_parent = op->ceed; 55 ceed_ref->Error = op->ceed->Error; 56 op->ceed->op_fallback_ceed = ceed_ref; 57 } 58 ceed_ref = op->ceed->op_fallback_ceed; 59 60 // Clone Op 61 CeedOperator op_ref; 62 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 63 memcpy(op_ref, op, sizeof(*op_ref)); 64 op_ref->data = NULL; 65 op_ref->is_interface_setup = false; 66 op_ref->is_backend_setup = false; 67 op_ref->ceed = ceed_ref; 68 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 69 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 70 &op_ref->qf_assembled); CeedChk(ierr); 71 op->op_fallback = op_ref; 72 73 // Clone QF 74 CeedQFunction qf_ref; 75 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 76 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 77 qf_ref->data = NULL; 78 qf_ref->ceed = ceed_ref; 79 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 80 op_ref->qf = qf_ref; 81 op->qf_fallback = qf_ref; 82 return CEED_ERROR_SUCCESS; 83 } 84 85 /** 86 @brief Select correct basis matrix pointer based on CeedEvalMode 87 88 @param[in] eval_mode Current basis evaluation mode 89 @param[in] identity Pointer to identity matrix 90 @param[in] interp Pointer to interpolation matrix 91 @param[in] grad Pointer to gradient matrix 92 @param[out] basis_ptr Basis pointer to set 93 94 @ref Developer 95 **/ 96 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 97 const CeedScalar *identity, const CeedScalar *interp, 98 const CeedScalar *grad, const CeedScalar **basis_ptr) { 99 switch (eval_mode) { 100 case CEED_EVAL_NONE: 101 *basis_ptr = identity; 102 break; 103 case CEED_EVAL_INTERP: 104 *basis_ptr = interp; 105 break; 106 case CEED_EVAL_GRAD: 107 *basis_ptr = grad; 108 break; 109 case CEED_EVAL_WEIGHT: 110 case CEED_EVAL_DIV: 111 case CEED_EVAL_CURL: 112 break; // Caught by QF Assembly 113 } 114 } 115 116 /** 117 @brief Create point block restriction for active operator field 118 119 @param[in] rstr Original CeedElemRestriction for active field 120 @param[out] pointblock_rstr Address of the variable where the newly created 121 CeedElemRestriction will be stored 122 123 @return An error code: 0 - success, otherwise - failure 124 125 @ref Developer 126 **/ 127 static int CeedOperatorCreateActivePointBlockRestriction( 128 CeedElemRestriction rstr, 129 CeedElemRestriction *pointblock_rstr) { 130 int ierr; 131 Ceed ceed; 132 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 133 const CeedInt *offsets; 134 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 135 CeedChk(ierr); 136 137 // Expand offsets 138 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 139 *pointblock_offsets; 140 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 141 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 142 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 143 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 144 CeedInt shift = num_comp; 145 if (comp_stride != 1) 146 shift *= num_comp; 147 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 148 CeedChk(ierr); 149 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 150 pointblock_offsets[i] = offsets[i]*shift; 151 if (pointblock_offsets[i] > max) 152 max = pointblock_offsets[i]; 153 } 154 155 // Create new restriction 156 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 157 1, max + num_comp*num_comp, CEED_MEM_HOST, 158 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 159 CeedChk(ierr); 160 161 // Cleanup 162 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 163 164 return CEED_ERROR_SUCCESS; 165 } 166 167 /** 168 @brief Core logic for assembling operator diagonal or point block diagonal 169 170 @param[in] op CeedOperator to assemble point block diagonal 171 @param[in] request Address of CeedRequest for non-blocking completion, else 172 CEED_REQUEST_IMMEDIATE 173 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 174 @param[out] assembled CeedVector to store assembled diagonal 175 176 @return An error code: 0 - success, otherwise - failure 177 178 @ref Developer 179 **/ 180 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 181 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 182 int ierr; 183 Ceed ceed; 184 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 185 186 // Assemble QFunction 187 CeedQFunction qf; 188 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 189 CeedInt num_input_fields, num_output_fields; 190 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 191 CeedChk(ierr); 192 CeedVector assembled_qf; 193 CeedElemRestriction rstr; 194 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 195 &rstr, request); CeedChk(ierr); 196 CeedInt layout[3]; 197 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 198 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 199 CeedScalar max_norm = 0; 200 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 201 202 // Determine active input basis 203 CeedOperatorField *op_fields; 204 CeedQFunctionField *qf_fields; 205 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 206 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 207 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 208 CeedEvalMode *eval_mode_in = NULL; 209 CeedBasis basis_in = NULL; 210 CeedElemRestriction rstr_in = NULL; 211 for (CeedInt i=0; i<num_input_fields; i++) { 212 CeedVector vec; 213 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 214 if (vec == CEED_VECTOR_ACTIVE) { 215 CeedElemRestriction rstr; 216 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 217 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 218 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 219 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 220 if (rstr_in && rstr_in != rstr) 221 // LCOV_EXCL_START 222 return CeedError(ceed, CEED_ERROR_BACKEND, 223 "Multi-field non-composite operator diagonal assembly not supported"); 224 // LCOV_EXCL_STOP 225 rstr_in = rstr; 226 CeedEvalMode eval_mode; 227 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 228 switch (eval_mode) { 229 case CEED_EVAL_NONE: 230 case CEED_EVAL_INTERP: 231 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 232 eval_mode_in[num_eval_mode_in] = eval_mode; 233 num_eval_mode_in += 1; 234 break; 235 case CEED_EVAL_GRAD: 236 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 237 for (CeedInt d=0; d<dim; d++) 238 eval_mode_in[num_eval_mode_in+d] = eval_mode; 239 num_eval_mode_in += dim; 240 break; 241 case CEED_EVAL_WEIGHT: 242 case CEED_EVAL_DIV: 243 case CEED_EVAL_CURL: 244 break; // Caught by QF Assembly 245 } 246 } 247 } 248 249 // Determine active output basis 250 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 251 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 252 CeedInt num_eval_mode_out = 0; 253 CeedEvalMode *eval_mode_out = NULL; 254 CeedBasis basis_out = NULL; 255 CeedElemRestriction rstr_out = NULL; 256 for (CeedInt i=0; i<num_output_fields; i++) { 257 CeedVector vec; 258 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 259 if (vec == CEED_VECTOR_ACTIVE) { 260 CeedElemRestriction rstr; 261 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 262 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 263 if (rstr_out && rstr_out != rstr) 264 // LCOV_EXCL_START 265 return CeedError(ceed, CEED_ERROR_BACKEND, 266 "Multi-field non-composite operator diagonal assembly not supported"); 267 // LCOV_EXCL_STOP 268 rstr_out = rstr; 269 CeedEvalMode eval_mode; 270 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 271 switch (eval_mode) { 272 case CEED_EVAL_NONE: 273 case CEED_EVAL_INTERP: 274 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 275 eval_mode_out[num_eval_mode_out] = eval_mode; 276 num_eval_mode_out += 1; 277 break; 278 case CEED_EVAL_GRAD: 279 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 280 for (CeedInt d=0; d<dim; d++) 281 eval_mode_out[num_eval_mode_out+d] = eval_mode; 282 num_eval_mode_out += dim; 283 break; 284 case CEED_EVAL_WEIGHT: 285 case CEED_EVAL_DIV: 286 case CEED_EVAL_CURL: 287 break; // Caught by QF Assembly 288 } 289 } 290 } 291 292 // Assemble point block diagonal restriction, if needed 293 CeedElemRestriction diag_rstr = rstr_out; 294 if (is_pointblock) { 295 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 296 CeedChk(ierr); 297 } 298 299 // Create diagonal vector 300 CeedVector elem_diag; 301 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 302 CeedChk(ierr); 303 304 // Assemble element operator diagonals 305 CeedScalar *elem_diag_array; 306 const CeedScalar *assembled_qf_array; 307 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 308 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 309 CeedChk(ierr); 310 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 311 CeedChk(ierr); 312 CeedInt num_elem, num_nodes, num_qpts; 313 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 314 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 315 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 316 // Basis matrices 317 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 318 CeedScalar *identity = NULL; 319 bool evalNone = false; 320 for (CeedInt i=0; i<num_eval_mode_in; i++) 321 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 322 for (CeedInt i=0; i<num_eval_mode_out; i++) 323 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 324 if (evalNone) { 325 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 326 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 327 identity[i*num_nodes+i] = 1.0; 328 } 329 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 330 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 331 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 332 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 333 // Compute the diagonal of B^T D B 334 // Each element 335 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 336 for (CeedInt e=0; e<num_elem; e++) { 337 CeedInt d_out = -1; 338 // Each basis eval mode pair 339 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 340 const CeedScalar *bt = NULL; 341 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 342 d_out += 1; 343 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 344 &grad_out[d_out*num_qpts*num_nodes], &bt); 345 CeedInt d_in = -1; 346 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 347 const CeedScalar *b = NULL; 348 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 349 d_in += 1; 350 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 351 &grad_in[d_in*num_qpts*num_nodes], &b); 352 // Each component 353 for (CeedInt c_out=0; c_out<num_comp; c_out++) 354 // Each qpoint/node pair 355 for (CeedInt q=0; q<num_qpts; q++) 356 if (is_pointblock) { 357 // Point Block Diagonal 358 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 359 const CeedScalar qf_value = 360 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 361 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 362 if (fabs(qf_value) > qf_value_bound) 363 for (CeedInt n=0; n<num_nodes; n++) 364 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 365 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 366 } 367 } else { 368 // Diagonal Only 369 const CeedScalar qf_value = 370 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 371 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 372 if (fabs(qf_value) > qf_value_bound) 373 for (CeedInt n=0; n<num_nodes; n++) 374 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 375 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 376 } 377 } 378 } 379 } 380 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 381 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 382 CeedChk(ierr); 383 384 // Assemble local operator diagonal 385 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 386 assembled, request); CeedChk(ierr); 387 388 // Cleanup 389 if (is_pointblock) { 390 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 391 } 392 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 393 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 394 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 395 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 396 ierr = CeedFree(&identity); CeedChk(ierr); 397 398 return CEED_ERROR_SUCCESS; 399 } 400 401 /** 402 @brief Core logic for assembling composite operator diagonal 403 404 @param[in] op CeedOperator to assemble point block diagonal 405 @param[in] request Address of CeedRequest for non-blocking completion, else 406 CEED_REQUEST_IMMEDIATE 407 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 408 @param[out] assembled CeedVector to store assembled diagonal 409 410 @return An error code: 0 - success, otherwise - failure 411 412 @ref Developer 413 **/ 414 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 415 CeedOperator op, CeedRequest *request, const bool is_pointblock, 416 CeedVector assembled) { 417 int ierr; 418 CeedInt num_sub; 419 CeedOperator *suboperators; 420 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 421 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 422 for (CeedInt i = 0; i < num_sub; i++) { 423 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 424 is_pointblock, assembled); CeedChk(ierr); 425 } 426 return CEED_ERROR_SUCCESS; 427 } 428 429 /** 430 @brief Build nonzero pattern for non-composite operator 431 432 Users should generally use CeedOperatorLinearAssembleSymbolic() 433 434 @param[in] op CeedOperator to assemble nonzero pattern 435 @param[in] offset Offset for number of entries 436 @param[out] rows Row number for each entry 437 @param[out] cols Column number for each entry 438 439 @return An error code: 0 - success, otherwise - failure 440 441 @ref Developer 442 **/ 443 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 444 CeedInt *rows, CeedInt *cols) { 445 int ierr; 446 Ceed ceed = op->ceed; 447 if (op->is_composite) 448 // LCOV_EXCL_START 449 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 450 "Composite operator not supported"); 451 // LCOV_EXCL_STOP 452 453 CeedElemRestriction rstr_in; 454 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 455 CeedSize num_nodes; 456 CeedInt num_elem, elem_size, num_comp; 457 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 458 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 459 ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr); 460 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 461 CeedInt layout_er[3]; 462 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 463 464 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 465 466 // Determine elem_dof relation 467 CeedVector index_vec; 468 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 469 CeedScalar *array; 470 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 471 for (CeedInt i = 0; i < num_nodes; ++i) { 472 array[i] = i; 473 } 474 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 475 CeedVector elem_dof; 476 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 477 CeedChk(ierr); 478 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 479 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 480 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 481 const CeedScalar *elem_dof_a; 482 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 483 CeedChk(ierr); 484 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 485 486 // Determine i, j locations for element matrices 487 CeedInt count = 0; 488 for (int e = 0; e < num_elem; ++e) { 489 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 490 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 491 for (int i = 0; i < elem_size; ++i) { 492 for (int j = 0; j < elem_size; ++j) { 493 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 494 (comp_out)*layout_er[1] + e*layout_er[2]; 495 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 496 (comp_in)*layout_er[1] + e*layout_er[2]; 497 498 const CeedInt row = elem_dof_a[elem_dof_index_row]; 499 const CeedInt col = elem_dof_a[elem_dof_index_col]; 500 501 rows[offset + count] = row; 502 cols[offset + count] = col; 503 count++; 504 } 505 } 506 } 507 } 508 } 509 if (count != local_num_entries) 510 // LCOV_EXCL_START 511 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 512 // LCOV_EXCL_STOP 513 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 514 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 515 516 return CEED_ERROR_SUCCESS; 517 } 518 519 /** 520 @brief Assemble nonzero entries for non-composite operator 521 522 Users should generally use CeedOperatorLinearAssemble() 523 524 @param[in] op CeedOperator to assemble 525 @param[out] offset Offest for number of entries 526 @param[out] values Values to assemble into matrix 527 528 @return An error code: 0 - success, otherwise - failure 529 530 @ref Developer 531 **/ 532 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 533 CeedVector values) { 534 int ierr; 535 Ceed ceed = op->ceed; 536 if (op->is_composite) 537 // LCOV_EXCL_START 538 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 539 "Composite operator not supported"); 540 // LCOV_EXCL_STOP 541 542 // Assemble QFunction 543 CeedQFunction qf; 544 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 545 CeedVector assembled_qf; 546 CeedElemRestriction rstr_q; 547 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 548 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 549 550 CeedSize qf_length; 551 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 552 553 CeedInt num_input_fields, num_output_fields; 554 CeedOperatorField *input_fields; 555 CeedOperatorField *output_fields; 556 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 557 &num_output_fields, &output_fields); CeedChk(ierr); 558 559 // Determine active input basis 560 CeedQFunctionField *qf_fields; 561 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 562 CeedInt num_eval_mode_in = 0, dim = 1; 563 CeedEvalMode *eval_mode_in = NULL; 564 CeedBasis basis_in = NULL; 565 CeedElemRestriction rstr_in = NULL; 566 for (CeedInt i=0; i<num_input_fields; i++) { 567 CeedVector vec; 568 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 569 if (vec == CEED_VECTOR_ACTIVE) { 570 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 571 CeedChk(ierr); 572 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 573 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 574 CeedChk(ierr); 575 CeedEvalMode eval_mode; 576 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 577 CeedChk(ierr); 578 switch (eval_mode) { 579 case CEED_EVAL_NONE: 580 case CEED_EVAL_INTERP: 581 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 582 eval_mode_in[num_eval_mode_in] = eval_mode; 583 num_eval_mode_in += 1; 584 break; 585 case CEED_EVAL_GRAD: 586 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 587 for (CeedInt d=0; d<dim; d++) { 588 eval_mode_in[num_eval_mode_in+d] = eval_mode; 589 } 590 num_eval_mode_in += dim; 591 break; 592 case CEED_EVAL_WEIGHT: 593 case CEED_EVAL_DIV: 594 case CEED_EVAL_CURL: 595 break; // Caught by QF Assembly 596 } 597 } 598 } 599 600 // Determine active output basis 601 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 602 CeedInt num_eval_mode_out = 0; 603 CeedEvalMode *eval_mode_out = NULL; 604 CeedBasis basis_out = NULL; 605 CeedElemRestriction rstr_out = NULL; 606 for (CeedInt i=0; i<num_output_fields; i++) { 607 CeedVector vec; 608 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 609 if (vec == CEED_VECTOR_ACTIVE) { 610 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 611 CeedChk(ierr); 612 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 613 CeedChk(ierr); 614 CeedEvalMode eval_mode; 615 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 616 CeedChk(ierr); 617 switch (eval_mode) { 618 case CEED_EVAL_NONE: 619 case CEED_EVAL_INTERP: 620 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 621 eval_mode_out[num_eval_mode_out] = eval_mode; 622 num_eval_mode_out += 1; 623 break; 624 case CEED_EVAL_GRAD: 625 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 626 for (CeedInt d=0; d<dim; d++) { 627 eval_mode_out[num_eval_mode_out+d] = eval_mode; 628 } 629 num_eval_mode_out += dim; 630 break; 631 case CEED_EVAL_WEIGHT: 632 case CEED_EVAL_DIV: 633 case CEED_EVAL_CURL: 634 break; // Caught by QF Assembly 635 } 636 } 637 } 638 639 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 640 // LCOV_EXCL_START 641 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 642 "Cannot assemble operator with out inputs/outputs"); 643 // LCOV_EXCL_STOP 644 645 CeedInt num_elem, elem_size, num_qpts, num_comp; 646 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 647 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 648 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 649 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 650 651 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 652 653 // loop over elements and put in data structure 654 const CeedScalar *interp_in, *grad_in; 655 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 656 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 657 658 const CeedScalar *assembled_qf_array; 659 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 660 CeedChk(ierr); 661 662 CeedInt layout_qf[3]; 663 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 664 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 665 666 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 667 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 668 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 669 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 670 num_qpts]; // logically 3-tensor 671 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 672 CeedScalar elem_mat[elem_size * elem_size]; 673 int count = 0; 674 CeedScalar *vals; 675 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 676 for (int e = 0; e < num_elem; ++e) { 677 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 678 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 679 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 680 B_mat_in[ell] = 0.0; 681 } 682 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 683 B_mat_out[ell] = 0.0; 684 } 685 // Store block-diagonal D matrix as collection of small dense blocks 686 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 687 D_mat[ell] = 0.0; 688 } 689 // form element matrix itself (for each block component) 690 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 691 elem_mat[ell] = 0.0; 692 } 693 for (int q = 0; q < num_qpts; ++q) { 694 for (int n = 0; n < elem_size; ++n) { 695 CeedInt d_in = -1; 696 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 697 const int qq = num_eval_mode_in*q; 698 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 699 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 700 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 701 d_in += 1; 702 B_mat_in[(qq+e_in)*elem_size + n] += 703 grad_in[(d_in*num_qpts+q) * elem_size + n]; 704 } else { 705 // LCOV_EXCL_START 706 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 707 // LCOV_EXCL_STOP 708 } 709 } 710 CeedInt d_out = -1; 711 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 712 const int qq = num_eval_mode_out*q; 713 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 714 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 715 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 716 d_out += 1; 717 B_mat_out[(qq+e_out)*elem_size + n] += 718 grad_in[(d_out*num_qpts+q) * elem_size + n]; 719 } else { 720 // LCOV_EXCL_START 721 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 722 // LCOV_EXCL_STOP 723 } 724 } 725 } 726 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 727 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 728 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 729 +comp_out; 730 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 731 e*layout_qf[2]; 732 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 733 } 734 } 735 } 736 // Compute B^T*D 737 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 738 BTD[ell] = 0.0; 739 } 740 for (int j = 0; j<elem_size; ++j) { 741 for (int q = 0; q<num_qpts; ++q) { 742 int qq = num_eval_mode_out*q; 743 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 744 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 745 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 746 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 747 } 748 } 749 } 750 } 751 752 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 753 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 754 755 // put element matrix in coordinate data structure 756 for (int i = 0; i < elem_size; ++i) { 757 for (int j = 0; j < elem_size; ++j) { 758 vals[offset + count] = elem_mat[i*elem_size + j]; 759 count++; 760 } 761 } 762 } 763 } 764 } 765 if (count != local_num_entries) 766 // LCOV_EXCL_START 767 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 768 // LCOV_EXCL_STOP 769 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 770 771 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 772 CeedChk(ierr); 773 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 774 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 775 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 776 777 return CEED_ERROR_SUCCESS; 778 } 779 780 /** 781 @brief Count number of entries for assembled CeedOperator 782 783 @param[in] op CeedOperator to assemble 784 @param[out] num_entries Number of entries in assembled representation 785 786 @return An error code: 0 - success, otherwise - failure 787 788 @ref Utility 789 **/ 790 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 791 CeedInt *num_entries) { 792 int ierr; 793 CeedElemRestriction rstr; 794 CeedInt num_elem, elem_size, num_comp; 795 796 if (op->is_composite) 797 // LCOV_EXCL_START 798 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 799 "Composite operator not supported"); 800 // LCOV_EXCL_STOP 801 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 802 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 803 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 804 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 805 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 806 807 return CEED_ERROR_SUCCESS; 808 } 809 810 /** 811 @brief Common code for creating a multigrid coarse operator and level 812 transfer operators for a CeedOperator 813 814 @param[in] op_fine Fine grid operator 815 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 816 @param[in] rstr_coarse Coarse grid restriction 817 @param[in] basis_coarse Coarse grid active vector basis 818 @param[in] basis_c_to_f Basis for coarse to fine interpolation 819 @param[out] op_coarse Coarse grid operator 820 @param[out] op_prolong Coarse to fine operator 821 @param[out] op_restrict Fine to coarse operator 822 823 @return An error code: 0 - success, otherwise - failure 824 825 @ref Developer 826 **/ 827 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 828 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 829 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 830 CeedOperator *op_restrict) { 831 int ierr; 832 Ceed ceed; 833 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 834 835 // Check for composite operator 836 bool is_composite; 837 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 838 if (is_composite) 839 // LCOV_EXCL_START 840 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 841 "Automatic multigrid setup for composite operators not supported"); 842 // LCOV_EXCL_STOP 843 844 // Coarse Grid 845 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 846 op_coarse); CeedChk(ierr); 847 CeedElemRestriction rstr_fine = NULL; 848 // -- Clone input fields 849 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 850 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 851 rstr_fine = op_fine->input_fields[i]->elem_restr; 852 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 853 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 854 CeedChk(ierr); 855 } else { 856 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 857 op_fine->input_fields[i]->elem_restr, 858 op_fine->input_fields[i]->basis, 859 op_fine->input_fields[i]->vec); CeedChk(ierr); 860 } 861 } 862 // -- Clone output fields 863 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 864 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 865 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 866 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 867 CeedChk(ierr); 868 } else { 869 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 870 op_fine->output_fields[i]->elem_restr, 871 op_fine->output_fields[i]->basis, 872 op_fine->output_fields[i]->vec); CeedChk(ierr); 873 } 874 } 875 // -- Clone QFunctionAssemblyData 876 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 877 &(*op_coarse)->qf_assembled); CeedChk(ierr); 878 879 // Multiplicity vector 880 CeedVector mult_vec, mult_e_vec; 881 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 882 CeedChk(ierr); 883 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 884 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 885 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 886 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 887 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 888 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 889 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 890 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 891 892 // Restriction 893 CeedInt num_comp; 894 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 895 CeedQFunction qf_restrict; 896 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 897 CeedChk(ierr); 898 CeedInt *num_comp_r_data; 899 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 900 num_comp_r_data[0] = num_comp; 901 CeedQFunctionContext ctx_r; 902 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 903 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 904 sizeof(*num_comp_r_data), num_comp_r_data); 905 CeedChk(ierr); 906 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 907 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 908 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 909 CeedChk(ierr); 910 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 911 CeedChk(ierr); 912 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 913 CEED_EVAL_INTERP); CeedChk(ierr); 914 915 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 916 CEED_QFUNCTION_NONE, op_restrict); 917 CeedChk(ierr); 918 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 919 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 920 CeedChk(ierr); 921 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 922 CEED_BASIS_COLLOCATED, mult_vec); 923 CeedChk(ierr); 924 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 925 CEED_VECTOR_ACTIVE); CeedChk(ierr); 926 927 // Prolongation 928 CeedQFunction qf_prolong; 929 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 930 CeedChk(ierr); 931 CeedInt *num_comp_p_data; 932 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 933 num_comp_p_data[0] = num_comp; 934 CeedQFunctionContext ctx_p; 935 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 936 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 937 sizeof(*num_comp_p_data), num_comp_p_data); 938 CeedChk(ierr); 939 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 940 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 941 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 942 CeedChk(ierr); 943 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 944 CeedChk(ierr); 945 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 946 CeedChk(ierr); 947 948 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 949 CEED_QFUNCTION_NONE, op_prolong); 950 CeedChk(ierr); 951 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 952 CEED_VECTOR_ACTIVE); CeedChk(ierr); 953 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 954 CEED_BASIS_COLLOCATED, mult_vec); 955 CeedChk(ierr); 956 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 957 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 958 CeedChk(ierr); 959 960 // Cleanup 961 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 962 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 963 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 964 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 965 return CEED_ERROR_SUCCESS; 966 } 967 968 /** 969 @brief Build 1D mass matrix and Laplacian with perturbation 970 971 @param[in] interp_1d Interpolation matrix in one dimension 972 @param[in] grad_1d Gradient matrix in one dimension 973 @param[in] q_weight_1d Quadrature weights in one dimension 974 @param[in] P_1d Number of basis nodes in one dimension 975 @param[in] Q_1d Number of quadrature points in one dimension 976 @param[in] dim Dimension of basis 977 @param[out] mass Assembled mass matrix in one dimension 978 @param[out] laplace Assembled perturbed Laplacian in one dimension 979 980 @return An error code: 0 - success, otherwise - failure 981 982 @ref Developer 983 **/ 984 CeedPragmaOptimizeOff 985 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 986 const CeedScalar *grad_1d, 987 const CeedScalar *q_weight_1d, CeedInt P_1d, 988 CeedInt Q_1d, CeedInt dim, 989 CeedScalar *mass, CeedScalar *laplace) { 990 991 for (CeedInt i=0; i<P_1d; i++) 992 for (CeedInt j=0; j<P_1d; j++) { 993 CeedScalar sum = 0.0; 994 for (CeedInt k=0; k<Q_1d; k++) 995 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 996 mass[i+j*P_1d] = sum; 997 } 998 // -- Laplacian 999 for (CeedInt i=0; i<P_1d; i++) 1000 for (CeedInt j=0; j<P_1d; j++) { 1001 CeedScalar sum = 0.0; 1002 for (CeedInt k=0; k<Q_1d; k++) 1003 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1004 laplace[i+j*P_1d] = sum; 1005 } 1006 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1007 for (CeedInt i=0; i<P_1d; i++) 1008 laplace[i+P_1d*i] += perturbation; 1009 return CEED_ERROR_SUCCESS; 1010 } 1011 CeedPragmaOptimizeOn 1012 1013 /// @} 1014 1015 /// ---------------------------------------------------------------------------- 1016 /// CeedOperator Backend API 1017 /// ---------------------------------------------------------------------------- 1018 /// @addtogroup CeedOperatorBackend 1019 /// @{ 1020 1021 /** 1022 @brief Create object holding CeedQFunction assembly data for CeedOperator 1023 1024 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1025 @param[out] data Address of the variable where the newly created 1026 CeedQFunctionAssemblyData will be stored 1027 1028 @return An error code: 0 - success, otherwise - failure 1029 1030 @ref Backend 1031 **/ 1032 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1033 CeedQFunctionAssemblyData *data) { 1034 int ierr; 1035 1036 ierr = CeedCalloc(1, data); CeedChk(ierr); 1037 (*data)->ref_count = 1; 1038 (*data)->ceed = ceed; 1039 ierr = CeedReference(ceed); CeedChk(ierr); 1040 1041 return CEED_ERROR_SUCCESS; 1042 } 1043 1044 /** 1045 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1046 1047 @param data CeedQFunctionAssemblyData to increment the reference counter 1048 1049 @return An error code: 0 - success, otherwise - failure 1050 1051 @ref Backend 1052 **/ 1053 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1054 data->ref_count++; 1055 return CEED_ERROR_SUCCESS; 1056 } 1057 1058 /** 1059 @brief Set re-use of CeedQFunctionAssemblyData 1060 1061 @param data CeedQFunctionAssemblyData to mark for reuse 1062 @param reuse_data Boolean flag indicating data re-use 1063 1064 @return An error code: 0 - success, otherwise - failure 1065 1066 @ref Backend 1067 **/ 1068 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, 1069 bool reuse_data) { 1070 data->reuse_data = reuse_data; 1071 data->needs_data_update = true; 1072 return CEED_ERROR_SUCCESS; 1073 } 1074 1075 /** 1076 @brief Mark QFunctionAssemblyData as stale 1077 1078 @param data CeedQFunctionAssemblyData to mark as stale 1079 @param needs_data_update Boolean flag indicating if update is needed or completed 1080 1081 @return An error code: 0 - success, otherwise - failure 1082 1083 @ref Backend 1084 **/ 1085 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, 1086 bool needs_data_update) { 1087 data->needs_data_update = needs_data_update; 1088 return CEED_ERROR_SUCCESS; 1089 } 1090 1091 /** 1092 @brief Determine if QFunctionAssemblyData needs update 1093 1094 @param[in] data CeedQFunctionAssemblyData to mark as stale 1095 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1096 1097 @return An error code: 0 - success, otherwise - failure 1098 1099 @ref Backend 1100 **/ 1101 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1102 bool *is_update_needed) { 1103 *is_update_needed = !data->reuse_data || data->needs_data_update; 1104 return CEED_ERROR_SUCCESS; 1105 } 1106 1107 /** 1108 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1109 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1110 Note: If `*data_copy` is non-NULL, then it is assumed that 1111 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1112 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1113 the only reference to this CeedQFunctionAssemblyData. 1114 1115 @param data CeedQFunctionAssemblyData to copy reference to 1116 @param[out] data_copy Variable to store copied reference 1117 1118 @return An error code: 0 - success, otherwise - failure 1119 1120 @ref Backend 1121 **/ 1122 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1123 CeedQFunctionAssemblyData *data_copy) { 1124 int ierr; 1125 1126 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1127 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1128 *data_copy = data; 1129 return CEED_ERROR_SUCCESS; 1130 } 1131 1132 /** 1133 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1134 1135 @param[in] data CeedQFunctionAssemblyData to retreive status 1136 @param[out] is_setup Boolean flag for setup status 1137 1138 @return An error code: 0 - success, otherwise - failure 1139 1140 @ref Backend 1141 **/ 1142 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1143 bool *is_setup) { 1144 *is_setup = data->is_setup; 1145 return CEED_ERROR_SUCCESS; 1146 } 1147 1148 /** 1149 @brief Set internal objects for CeedQFunctionAssemblyData 1150 1151 @param[in] data CeedQFunctionAssemblyData to set objects 1152 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1153 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1154 1155 @return An error code: 0 - success, otherwise - failure 1156 1157 @ref Backend 1158 **/ 1159 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1160 CeedVector vec, CeedElemRestriction rstr) { 1161 int ierr; 1162 1163 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1164 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1165 1166 data->is_setup = true; 1167 return CEED_ERROR_SUCCESS; 1168 } 1169 1170 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1171 CeedVector *vec, CeedElemRestriction *rstr) { 1172 int ierr; 1173 1174 if (!data->is_setup) 1175 // LCOV_EXCL_START 1176 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1177 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1178 // LCOV_EXCL_STOP 1179 1180 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1181 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1182 1183 return CEED_ERROR_SUCCESS; 1184 } 1185 1186 /** 1187 @brief Destroy CeedQFunctionAssemblyData 1188 1189 @param[out] data CeedQFunctionAssemblyData to destroy 1190 1191 @return An error code: 0 - success, otherwise - failure 1192 1193 @ref Backend 1194 **/ 1195 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1196 int ierr; 1197 1198 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1199 1200 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1201 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1202 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1203 1204 ierr = CeedFree(data); CeedChk(ierr); 1205 return CEED_ERROR_SUCCESS; 1206 } 1207 1208 /// @} 1209 1210 /// ---------------------------------------------------------------------------- 1211 /// CeedOperator Public API 1212 /// ---------------------------------------------------------------------------- 1213 /// @addtogroup CeedOperatorUser 1214 /// @{ 1215 1216 /** 1217 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1218 1219 This returns a CeedVector containing a matrix at each quadrature point 1220 providing the action of the CeedQFunction associated with the CeedOperator. 1221 The vector 'assembled' is of shape 1222 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1223 and contains column-major matrices representing the action of the 1224 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1225 outputs are in the order provided by the user when adding CeedOperator fields. 1226 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1227 'v', provided in that order, would result in an assembled QFunction that 1228 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1229 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1230 1231 Note: Calling this function asserts that setup is complete 1232 and sets the CeedOperator as immutable. 1233 1234 @param op CeedOperator to assemble CeedQFunction 1235 @param[out] assembled CeedVector to store assembled CeedQFunction at 1236 quadrature points 1237 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1238 CeedQFunction 1239 @param request Address of CeedRequest for non-blocking completion, else 1240 @ref CEED_REQUEST_IMMEDIATE 1241 1242 @return An error code: 0 - success, otherwise - failure 1243 1244 @ref User 1245 **/ 1246 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1247 CeedElemRestriction *rstr, 1248 CeedRequest *request) { 1249 int ierr; 1250 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1251 1252 // Backend version 1253 if (op->LinearAssembleQFunction) { 1254 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1255 CeedChk(ierr); 1256 } else { 1257 // Fallback to reference Ceed 1258 if (!op->op_fallback) { 1259 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1260 } 1261 // Assemble 1262 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1263 rstr, request); CeedChk(ierr); 1264 } 1265 return CEED_ERROR_SUCCESS; 1266 } 1267 1268 /** 1269 @brief Assemble CeedQFunction and store result internall. Return copied 1270 references of stored data to the caller. Caller is responsible for 1271 ownership and destruction of the copied references. See also 1272 @ref CeedOperatorLinearAssembleQFunction 1273 1274 @param op CeedOperator to assemble CeedQFunction 1275 @param assembled CeedVector to store assembled CeedQFunction at 1276 quadrature points 1277 @param rstr CeedElemRestriction for CeedVector containing assembled 1278 CeedQFunction 1279 @param request Address of CeedRequest for non-blocking completion, else 1280 @ref CEED_REQUEST_IMMEDIATE 1281 1282 @return An error code: 0 - success, otherwise - failure 1283 1284 @ref User 1285 **/ 1286 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1287 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1288 int ierr; 1289 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1290 1291 // Backend version 1292 if (op->LinearAssembleQFunctionUpdate) { 1293 bool qf_assembled_is_setup; 1294 CeedVector assembled_vec = NULL; 1295 CeedElemRestriction assembled_rstr = NULL; 1296 1297 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1298 &qf_assembled_is_setup); CeedChk(ierr); 1299 if (qf_assembled_is_setup) { 1300 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1301 &assembled_rstr); CeedChk(ierr); 1302 1303 bool update_needed; 1304 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1305 &update_needed); CeedChk(ierr); 1306 if (update_needed) { 1307 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1308 request); CeedChk(ierr); 1309 } 1310 } else { 1311 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1312 request); CeedChk(ierr); 1313 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1314 assembled_rstr); CeedChk(ierr); 1315 } 1316 ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); 1317 CeedChk(ierr); 1318 1319 // Copy reference to internally held copy 1320 *assembled = NULL; 1321 *rstr = NULL; 1322 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1323 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1324 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1325 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1326 } else { 1327 // Fallback to reference Ceed 1328 if (!op->op_fallback) { 1329 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1330 } 1331 // Assemble 1332 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1333 assembled, rstr, request); CeedChk(ierr); 1334 } 1335 1336 return CEED_ERROR_SUCCESS; 1337 } 1338 1339 /** 1340 @brief Assemble the diagonal of a square linear CeedOperator 1341 1342 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1343 1344 Note: Currently only non-composite CeedOperators with a single field and 1345 composite CeedOperators with single field sub-operators are supported. 1346 1347 Note: Calling this function asserts that setup is complete 1348 and sets the CeedOperator as immutable. 1349 1350 @param op CeedOperator to assemble CeedQFunction 1351 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1352 @param request Address of CeedRequest for non-blocking completion, else 1353 @ref CEED_REQUEST_IMMEDIATE 1354 1355 @return An error code: 0 - success, otherwise - failure 1356 1357 @ref User 1358 **/ 1359 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1360 CeedRequest *request) { 1361 int ierr; 1362 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1363 1364 // Use backend version, if available 1365 if (op->LinearAssembleDiagonal) { 1366 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1367 return CEED_ERROR_SUCCESS; 1368 } else if (op->LinearAssembleAddDiagonal) { 1369 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1370 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1371 return CEED_ERROR_SUCCESS; 1372 } else { 1373 // Check for valid fallback resource 1374 const char *resource, *fallback_resource; 1375 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1376 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1377 CeedChk(ierr); 1378 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1379 // Fallback to reference Ceed 1380 if (!op->op_fallback) { 1381 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1382 } 1383 // Assemble 1384 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1385 CeedChk(ierr); 1386 return CEED_ERROR_SUCCESS; 1387 } 1388 } 1389 1390 // Default interface implementation 1391 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1392 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1393 CeedChk(ierr); 1394 return CEED_ERROR_SUCCESS; 1395 } 1396 1397 /** 1398 @brief Assemble the diagonal of a square linear CeedOperator 1399 1400 This sums into a CeedVector the diagonal of a linear CeedOperator. 1401 1402 Note: Currently only non-composite CeedOperators with a single field and 1403 composite CeedOperators with single field sub-operators are supported. 1404 1405 Note: Calling this function asserts that setup is complete 1406 and sets the CeedOperator as immutable. 1407 1408 @param op CeedOperator to assemble CeedQFunction 1409 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1410 @param request Address of CeedRequest for non-blocking completion, else 1411 @ref CEED_REQUEST_IMMEDIATE 1412 1413 @return An error code: 0 - success, otherwise - failure 1414 1415 @ref User 1416 **/ 1417 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1418 CeedRequest *request) { 1419 int ierr; 1420 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1421 1422 // Use backend version, if available 1423 if (op->LinearAssembleAddDiagonal) { 1424 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1425 return CEED_ERROR_SUCCESS; 1426 } else { 1427 // Check for valid fallback resource 1428 const char *resource, *fallback_resource; 1429 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1430 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1431 CeedChk(ierr); 1432 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1433 // Fallback to reference Ceed 1434 if (!op->op_fallback) { 1435 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1436 } 1437 // Assemble 1438 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1439 request); CeedChk(ierr); 1440 return CEED_ERROR_SUCCESS; 1441 } 1442 } 1443 1444 // Default interface implementation 1445 bool is_composite; 1446 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1447 if (is_composite) { 1448 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1449 false, assembled); CeedChk(ierr); 1450 return CEED_ERROR_SUCCESS; 1451 } else { 1452 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1453 CeedChk(ierr); 1454 return CEED_ERROR_SUCCESS; 1455 } 1456 } 1457 1458 /** 1459 @brief Assemble the point block diagonal of a square linear CeedOperator 1460 1461 This overwrites a CeedVector with the point block diagonal of a linear 1462 CeedOperator. 1463 1464 Note: Currently only non-composite CeedOperators with a single field and 1465 composite CeedOperators with single field sub-operators are supported. 1466 1467 Note: Calling this function asserts that setup is complete 1468 and sets the CeedOperator as immutable. 1469 1470 @param op CeedOperator to assemble CeedQFunction 1471 @param[out] assembled CeedVector to store assembled CeedOperator point block 1472 diagonal, provided in row-major form with an 1473 @a num_comp * @a num_comp block at each node. The dimensions 1474 of this vector are derived from the active vector 1475 for the CeedOperator. The array has shape 1476 [nodes, component out, component in]. 1477 @param request Address of CeedRequest for non-blocking completion, else 1478 CEED_REQUEST_IMMEDIATE 1479 1480 @return An error code: 0 - success, otherwise - failure 1481 1482 @ref User 1483 **/ 1484 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1485 CeedVector assembled, CeedRequest *request) { 1486 int ierr; 1487 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1488 1489 // Use backend version, if available 1490 if (op->LinearAssemblePointBlockDiagonal) { 1491 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1492 CeedChk(ierr); 1493 return CEED_ERROR_SUCCESS; 1494 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1495 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1496 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1497 request); CeedChk(ierr); 1498 return CEED_ERROR_SUCCESS; 1499 } else { 1500 // Check for valid fallback resource 1501 const char *resource, *fallback_resource; 1502 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1503 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1504 CeedChk(ierr); 1505 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1506 // Fallback to reference Ceed 1507 if (!op->op_fallback) { 1508 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1509 } 1510 // Assemble 1511 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1512 assembled, request); CeedChk(ierr); 1513 return CEED_ERROR_SUCCESS; 1514 } 1515 } 1516 1517 // Default interface implementation 1518 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1519 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1520 CeedChk(ierr); 1521 return CEED_ERROR_SUCCESS; 1522 } 1523 1524 /** 1525 @brief Assemble the point block diagonal of a square linear CeedOperator 1526 1527 This sums into a CeedVector with the point block diagonal of a linear 1528 CeedOperator. 1529 1530 Note: Currently only non-composite CeedOperators with a single field and 1531 composite CeedOperators with single field sub-operators are supported. 1532 1533 Note: Calling this function asserts that setup is complete 1534 and sets the CeedOperator as immutable. 1535 1536 @param op CeedOperator to assemble CeedQFunction 1537 @param[out] assembled CeedVector to store assembled CeedOperator point block 1538 diagonal, provided in row-major form with an 1539 @a num_comp * @a num_comp block at each node. The dimensions 1540 of this vector are derived from the active vector 1541 for the CeedOperator. The array has shape 1542 [nodes, component out, component in]. 1543 @param request Address of CeedRequest for non-blocking completion, else 1544 CEED_REQUEST_IMMEDIATE 1545 1546 @return An error code: 0 - success, otherwise - failure 1547 1548 @ref User 1549 **/ 1550 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1551 CeedVector assembled, CeedRequest *request) { 1552 int ierr; 1553 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1554 1555 // Use backend version, if available 1556 if (op->LinearAssembleAddPointBlockDiagonal) { 1557 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1558 CeedChk(ierr); 1559 return CEED_ERROR_SUCCESS; 1560 } else { 1561 // Check for valid fallback resource 1562 const char *resource, *fallback_resource; 1563 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1564 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1565 CeedChk(ierr); 1566 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1567 // Fallback to reference Ceed 1568 if (!op->op_fallback) { 1569 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1570 } 1571 // Assemble 1572 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1573 assembled, request); CeedChk(ierr); 1574 return CEED_ERROR_SUCCESS; 1575 } 1576 } 1577 1578 // Default interface implemenation 1579 bool is_composite; 1580 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1581 if (is_composite) { 1582 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1583 true, assembled); CeedChk(ierr); 1584 return CEED_ERROR_SUCCESS; 1585 } else { 1586 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1587 CeedChk(ierr); 1588 return CEED_ERROR_SUCCESS; 1589 } 1590 } 1591 1592 /** 1593 @brief Fully assemble the nonzero pattern of a linear operator. 1594 1595 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1596 1597 The assembly routines use coordinate format, with num_entries tuples of the 1598 form (i, j, value) which indicate that value should be added to the matrix 1599 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1600 This function returns the number of entries and their (i, j) locations, 1601 while CeedOperatorLinearAssemble() provides the values in the same 1602 ordering. 1603 1604 This will generally be slow unless your operator is low-order. 1605 1606 Note: Calling this function asserts that setup is complete 1607 and sets the CeedOperator as immutable. 1608 1609 @param[in] op CeedOperator to assemble 1610 @param[out] num_entries Number of entries in coordinate nonzero pattern 1611 @param[out] rows Row number for each entry 1612 @param[out] cols Column number for each entry 1613 1614 @ref User 1615 **/ 1616 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1617 CeedInt **rows, CeedInt **cols) { 1618 int ierr; 1619 CeedInt num_suboperators, single_entries; 1620 CeedOperator *sub_operators; 1621 bool is_composite; 1622 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1623 1624 // Use backend version, if available 1625 if (op->LinearAssembleSymbolic) { 1626 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1627 return CEED_ERROR_SUCCESS; 1628 } else { 1629 // Check for valid fallback resource 1630 const char *resource, *fallback_resource; 1631 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1632 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1633 CeedChk(ierr); 1634 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1635 // Fallback to reference Ceed 1636 if (!op->op_fallback) { 1637 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1638 } 1639 // Assemble 1640 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1641 cols); CeedChk(ierr); 1642 return CEED_ERROR_SUCCESS; 1643 } 1644 } 1645 1646 // Default interface implementation 1647 1648 // count entries and allocate rows, cols arrays 1649 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1650 *num_entries = 0; 1651 if (is_composite) { 1652 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1653 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1654 for (int k = 0; k < num_suboperators; ++k) { 1655 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1656 &single_entries); CeedChk(ierr); 1657 *num_entries += single_entries; 1658 } 1659 } else { 1660 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1661 &single_entries); CeedChk(ierr); 1662 *num_entries += single_entries; 1663 } 1664 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1665 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1666 1667 // assemble nonzero locations 1668 CeedInt offset = 0; 1669 if (is_composite) { 1670 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1671 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1672 for (int k = 0; k < num_suboperators; ++k) { 1673 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1674 *cols); CeedChk(ierr); 1675 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1676 &single_entries); 1677 CeedChk(ierr); 1678 offset += single_entries; 1679 } 1680 } else { 1681 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1682 CeedChk(ierr); 1683 } 1684 1685 return CEED_ERROR_SUCCESS; 1686 } 1687 1688 /** 1689 @brief Fully assemble the nonzero entries of a linear operator. 1690 1691 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1692 1693 The assembly routines use coordinate format, with num_entries tuples of the 1694 form (i, j, value) which indicate that value should be added to the matrix 1695 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1696 This function returns the values of the nonzero entries to be added, their 1697 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1698 1699 This will generally be slow unless your operator is low-order. 1700 1701 Note: Calling this function asserts that setup is complete 1702 and sets the CeedOperator as immutable. 1703 1704 @param[in] op CeedOperator to assemble 1705 @param[out] values Values to assemble into matrix 1706 1707 @ref User 1708 **/ 1709 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1710 int ierr; 1711 CeedInt num_suboperators, single_entries = 0; 1712 CeedOperator *sub_operators; 1713 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1714 1715 // Use backend version, if available 1716 if (op->LinearAssemble) { 1717 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1718 return CEED_ERROR_SUCCESS; 1719 } else { 1720 // Check for valid fallback resource 1721 const char *resource, *fallback_resource; 1722 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1723 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1724 CeedChk(ierr); 1725 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1726 // Fallback to reference Ceed 1727 if (!op->op_fallback) { 1728 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1729 } 1730 // Assemble 1731 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1732 return CEED_ERROR_SUCCESS; 1733 } 1734 } 1735 1736 // Default interface implementation 1737 bool is_composite; 1738 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1739 1740 CeedInt offset = 0; 1741 if (is_composite) { 1742 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1743 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1744 for (int k = 0; k < num_suboperators; ++k) { 1745 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1746 CeedChk(ierr); 1747 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1748 &single_entries); 1749 CeedChk(ierr); 1750 offset += single_entries; 1751 } 1752 } else { 1753 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1754 } 1755 1756 return CEED_ERROR_SUCCESS; 1757 } 1758 1759 /** 1760 @brief Create a multigrid coarse operator and level transfer operators 1761 for a CeedOperator, creating the prolongation basis from the 1762 fine and coarse grid interpolation 1763 1764 Note: Calling this function asserts that setup is complete 1765 and sets the CeedOperator as immutable. 1766 1767 @param[in] op_fine Fine grid operator 1768 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1769 @param[in] rstr_coarse Coarse grid restriction 1770 @param[in] basis_coarse Coarse grid active vector basis 1771 @param[out] op_coarse Coarse grid operator 1772 @param[out] op_prolong Coarse to fine operator 1773 @param[out] op_restrict Fine to coarse operator 1774 1775 @return An error code: 0 - success, otherwise - failure 1776 1777 @ref User 1778 **/ 1779 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1780 CeedVector p_mult_fine, 1781 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1782 CeedOperator *op_coarse, CeedOperator *op_prolong, 1783 CeedOperator *op_restrict) { 1784 int ierr; 1785 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1786 Ceed ceed; 1787 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1788 1789 // Check for compatible quadrature spaces 1790 CeedBasis basis_fine; 1791 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1792 CeedInt Q_f, Q_c; 1793 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1794 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1795 if (Q_f != Q_c) 1796 // LCOV_EXCL_START 1797 return CeedError(ceed, CEED_ERROR_DIMENSION, 1798 "Bases must have compatible quadrature spaces"); 1799 // LCOV_EXCL_STOP 1800 1801 // Coarse to fine basis 1802 CeedInt P_f, P_c, Q = Q_f; 1803 bool is_tensor_f, is_tensor_c; 1804 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1805 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1806 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1807 if (is_tensor_f && is_tensor_c) { 1808 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1809 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1810 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1811 } else if (!is_tensor_f && !is_tensor_c) { 1812 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1813 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1814 } else { 1815 // LCOV_EXCL_START 1816 return CeedError(ceed, CEED_ERROR_MINOR, 1817 "Bases must both be tensor or non-tensor"); 1818 // LCOV_EXCL_STOP 1819 } 1820 1821 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1822 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1823 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1824 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1825 if (is_tensor_f) { 1826 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1827 memcpy(interp_c, basis_coarse->interp_1d, 1828 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1829 } else { 1830 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1831 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1832 } 1833 1834 // -- QR Factorization, interp_f = Q R 1835 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1836 1837 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1838 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1839 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1840 1841 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1842 for (CeedInt j=0; j<P_c; j++) { // Column j 1843 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1844 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1845 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1846 for (CeedInt k=i+1; k<P_f; k++) 1847 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1848 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1849 } 1850 } 1851 ierr = CeedFree(&tau); CeedChk(ierr); 1852 ierr = CeedFree(&interp_c); CeedChk(ierr); 1853 ierr = CeedFree(&interp_f); CeedChk(ierr); 1854 1855 // Complete with interp_c_to_f versions of code 1856 if (is_tensor_f) { 1857 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1858 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1859 CeedChk(ierr); 1860 } else { 1861 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1862 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1863 CeedChk(ierr); 1864 } 1865 1866 // Cleanup 1867 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1868 return CEED_ERROR_SUCCESS; 1869 } 1870 1871 /** 1872 @brief Create a multigrid coarse operator and level transfer operators 1873 for a CeedOperator with a tensor basis for the active basis 1874 1875 Note: Calling this function asserts that setup is complete 1876 and sets the CeedOperator as immutable. 1877 1878 @param[in] op_fine Fine grid operator 1879 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1880 @param[in] rstr_coarse Coarse grid restriction 1881 @param[in] basis_coarse Coarse grid active vector basis 1882 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1883 @param[out] op_coarse Coarse grid operator 1884 @param[out] op_prolong Coarse to fine operator 1885 @param[out] op_restrict Fine to coarse operator 1886 1887 @return An error code: 0 - success, otherwise - failure 1888 1889 @ref User 1890 **/ 1891 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1892 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1893 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1894 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1895 int ierr; 1896 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1897 Ceed ceed; 1898 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1899 1900 // Check for compatible quadrature spaces 1901 CeedBasis basis_fine; 1902 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1903 CeedInt Q_f, Q_c; 1904 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1905 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1906 if (Q_f != Q_c) 1907 // LCOV_EXCL_START 1908 return CeedError(ceed, CEED_ERROR_DIMENSION, 1909 "Bases must have compatible quadrature spaces"); 1910 // LCOV_EXCL_STOP 1911 1912 // Coarse to fine basis 1913 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1914 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1915 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1916 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1917 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1918 CeedChk(ierr); 1919 P_1d_c = dim == 1 ? num_nodes_c : 1920 dim == 2 ? sqrt(num_nodes_c) : 1921 cbrt(num_nodes_c); 1922 CeedScalar *q_ref, *q_weight, *grad; 1923 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1924 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1925 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1926 CeedBasis basis_c_to_f; 1927 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1928 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1929 CeedChk(ierr); 1930 ierr = CeedFree(&q_ref); CeedChk(ierr); 1931 ierr = CeedFree(&q_weight); CeedChk(ierr); 1932 ierr = CeedFree(&grad); CeedChk(ierr); 1933 1934 // Core code 1935 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1936 basis_coarse, basis_c_to_f, op_coarse, 1937 op_prolong, op_restrict); 1938 CeedChk(ierr); 1939 return CEED_ERROR_SUCCESS; 1940 } 1941 1942 /** 1943 @brief Create a multigrid coarse operator and level transfer operators 1944 for a CeedOperator with a non-tensor basis for the active vector 1945 1946 Note: Calling this function asserts that setup is complete 1947 and sets the CeedOperator as immutable. 1948 1949 @param[in] op_fine Fine grid operator 1950 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1951 @param[in] rstr_coarse Coarse grid restriction 1952 @param[in] basis_coarse Coarse grid active vector basis 1953 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1954 @param[out] op_coarse Coarse grid operator 1955 @param[out] op_prolong Coarse to fine operator 1956 @param[out] op_restrict Fine to coarse operator 1957 1958 @return An error code: 0 - success, otherwise - failure 1959 1960 @ref User 1961 **/ 1962 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1963 CeedVector p_mult_fine, 1964 CeedElemRestriction rstr_coarse, 1965 CeedBasis basis_coarse, 1966 const CeedScalar *interp_c_to_f, 1967 CeedOperator *op_coarse, 1968 CeedOperator *op_prolong, 1969 CeedOperator *op_restrict) { 1970 int ierr; 1971 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1972 Ceed ceed; 1973 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1974 1975 // Check for compatible quadrature spaces 1976 CeedBasis basis_fine; 1977 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1978 CeedInt Q_f, Q_c; 1979 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1980 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1981 if (Q_f != Q_c) 1982 // LCOV_EXCL_START 1983 return CeedError(ceed, CEED_ERROR_DIMENSION, 1984 "Bases must have compatible quadrature spaces"); 1985 // LCOV_EXCL_STOP 1986 1987 // Coarse to fine basis 1988 CeedElemTopology topo; 1989 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 1990 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 1991 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1992 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1993 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 1994 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1995 CeedChk(ierr); 1996 CeedScalar *q_ref, *q_weight, *grad; 1997 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 1998 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 1999 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 2000 CeedBasis basis_c_to_f; 2001 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 2002 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2003 CeedChk(ierr); 2004 ierr = CeedFree(&q_ref); CeedChk(ierr); 2005 ierr = CeedFree(&q_weight); CeedChk(ierr); 2006 ierr = CeedFree(&grad); CeedChk(ierr); 2007 2008 // Core code 2009 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2010 basis_coarse, basis_c_to_f, op_coarse, 2011 op_prolong, op_restrict); 2012 CeedChk(ierr); 2013 return CEED_ERROR_SUCCESS; 2014 } 2015 2016 /** 2017 @brief Build a FDM based approximate inverse for each element for a 2018 CeedOperator 2019 2020 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2021 Method based approximate inverse. This function obtains the simultaneous 2022 diagonalization for the 1D mass and Laplacian operators, 2023 M = V^T V, K = V^T S V. 2024 The assembled QFunction is used to modify the eigenvalues from simultaneous 2025 diagonalization and obtain an approximate inverse of the form 2026 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2027 associated CeedQFunction must therefore also be linear. 2028 2029 Note: Calling this function asserts that setup is complete 2030 and sets the CeedOperator as immutable. 2031 2032 @param op CeedOperator to create element inverses 2033 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2034 for each element 2035 @param request Address of CeedRequest for non-blocking completion, else 2036 @ref CEED_REQUEST_IMMEDIATE 2037 2038 @return An error code: 0 - success, otherwise - failure 2039 2040 @ref User 2041 **/ 2042 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2043 CeedRequest *request) { 2044 int ierr; 2045 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2046 2047 // Use backend version, if available 2048 if (op->CreateFDMElementInverse) { 2049 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2050 return CEED_ERROR_SUCCESS; 2051 } else { 2052 // Check for valid fallback resource 2053 const char *resource, *fallback_resource; 2054 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2055 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2056 CeedChk(ierr); 2057 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2058 // Fallback to reference Ceed 2059 if (!op->op_fallback) { 2060 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2061 } 2062 // Assemble 2063 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2064 CeedChk(ierr); 2065 return CEED_ERROR_SUCCESS; 2066 } 2067 } 2068 2069 // Interface implementation 2070 Ceed ceed, ceed_parent; 2071 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2072 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2073 ceed_parent = ceed_parent ? ceed_parent : ceed; 2074 CeedQFunction qf; 2075 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2076 2077 // Determine active input basis 2078 bool interp = false, grad = false; 2079 CeedBasis basis = NULL; 2080 CeedElemRestriction rstr = NULL; 2081 CeedOperatorField *op_fields; 2082 CeedQFunctionField *qf_fields; 2083 CeedInt num_input_fields; 2084 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2085 CeedChk(ierr); 2086 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2087 for (CeedInt i=0; i<num_input_fields; i++) { 2088 CeedVector vec; 2089 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2090 if (vec == CEED_VECTOR_ACTIVE) { 2091 CeedEvalMode eval_mode; 2092 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2093 interp = interp || eval_mode == CEED_EVAL_INTERP; 2094 grad = grad || eval_mode == CEED_EVAL_GRAD; 2095 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2096 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2097 } 2098 } 2099 if (!basis) 2100 // LCOV_EXCL_START 2101 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2102 // LCOV_EXCL_STOP 2103 CeedSize l_size = 1; 2104 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2105 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2106 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2107 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2108 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2109 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2110 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2111 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2112 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2113 2114 // Build and diagonalize 1D Mass and Laplacian 2115 bool tensor_basis; 2116 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2117 if (!tensor_basis) 2118 // LCOV_EXCL_START 2119 return CeedError(ceed, CEED_ERROR_BACKEND, 2120 "FDMElementInverse only supported for tensor " 2121 "bases"); 2122 // LCOV_EXCL_STOP 2123 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2124 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2125 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2126 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2127 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2128 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2129 // -- Build matrices 2130 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2131 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2132 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2133 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2134 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2135 mass, laplace); CeedChk(ierr); 2136 2137 // -- Diagonalize 2138 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2139 CeedChk(ierr); 2140 ierr = CeedFree(&mass); CeedChk(ierr); 2141 ierr = CeedFree(&laplace); CeedChk(ierr); 2142 for (CeedInt i=0; i<P_1d; i++) 2143 for (CeedInt j=0; j<P_1d; j++) 2144 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2145 ierr = CeedFree(&x); CeedChk(ierr); 2146 2147 // Assemble QFunction 2148 CeedVector assembled; 2149 CeedElemRestriction rstr_qf; 2150 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2151 &rstr_qf, request); CeedChk(ierr); 2152 CeedInt layout[3]; 2153 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2154 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2155 CeedScalar max_norm = 0; 2156 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2157 2158 // Calculate element averages 2159 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2160 CeedScalar *elem_avg; 2161 const CeedScalar *assembled_array, *q_weight_array; 2162 CeedVector q_weight; 2163 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2164 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2165 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2166 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2167 CeedChk(ierr); 2168 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2169 CeedChk(ierr); 2170 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2171 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2172 for (CeedInt e=0; e<num_elem; e++) { 2173 CeedInt count = 0; 2174 for (CeedInt q=0; q<num_qpts; q++) 2175 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2176 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2177 qf_value_bound) { 2178 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2179 q_weight_array[q]; 2180 count++; 2181 } 2182 if (count) { 2183 elem_avg[e] /= count; 2184 } else { 2185 elem_avg[e] = 1.0; 2186 } 2187 } 2188 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2189 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2190 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2191 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2192 2193 // Build FDM diagonal 2194 CeedVector q_data; 2195 CeedScalar *q_data_array, *fdm_diagonal; 2196 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2197 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2198 for (CeedInt c=0; c<num_comp; c++) 2199 for (CeedInt n=0; n<elem_size; n++) { 2200 if (interp) 2201 fdm_diagonal[c*elem_size + n] = 1.0; 2202 if (grad) 2203 for (CeedInt d=0; d<dim; d++) { 2204 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2205 fdm_diagonal[c*elem_size + n] += lambda[i]; 2206 } 2207 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2208 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2209 } 2210 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2211 CeedChk(ierr); 2212 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2213 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2214 CeedChk(ierr); 2215 for (CeedInt e=0; e<num_elem; e++) 2216 for (CeedInt c=0; c<num_comp; c++) 2217 for (CeedInt n=0; n<elem_size; n++) 2218 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2219 fdm_diagonal[c*elem_size + n]); 2220 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2221 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2222 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2223 2224 // Setup FDM operator 2225 // -- Basis 2226 CeedBasis fdm_basis; 2227 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2228 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2229 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2230 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2231 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2232 fdm_interp, grad_dummy, q_ref_dummy, 2233 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2234 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2235 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2236 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2237 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2238 ierr = CeedFree(&lambda); CeedChk(ierr); 2239 2240 // -- Restriction 2241 CeedElemRestriction rstr_qd_i; 2242 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2243 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2244 num_comp, num_elem*num_comp*elem_size, 2245 strides, &rstr_qd_i); CeedChk(ierr); 2246 // -- QFunction 2247 CeedQFunction qf_fdm; 2248 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2249 CeedChk(ierr); 2250 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2251 CeedChk(ierr); 2252 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2253 CeedChk(ierr); 2254 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2255 CeedChk(ierr); 2256 // -- QFunction context 2257 CeedInt *num_comp_data; 2258 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2259 num_comp_data[0] = num_comp; 2260 CeedQFunctionContext ctx_fdm; 2261 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2262 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2263 sizeof(*num_comp_data), num_comp_data); 2264 CeedChk(ierr); 2265 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2266 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2267 // -- Operator 2268 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2269 CeedChk(ierr); 2270 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2271 CeedChk(ierr); 2272 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2273 q_data); CeedChk(ierr); 2274 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2275 CeedChk(ierr); 2276 2277 // Cleanup 2278 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2279 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2280 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2281 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2282 2283 return CEED_ERROR_SUCCESS; 2284 } 2285 2286 /// @} 2287