1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed-impl.h> 11 #include <math.h> 12 #include <stdbool.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 /// @file 17 /// Implementation of CeedOperator preconditioning interfaces 18 19 /// ---------------------------------------------------------------------------- 20 /// CeedOperator Library Internal Preconditioning Functions 21 /// ---------------------------------------------------------------------------- 22 /// @addtogroup CeedOperatorDeveloper 23 /// @{ 24 25 /** 26 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 27 CeedOperator functionality 28 29 @param op CeedOperator to create fallback for 30 31 @return An error code: 0 - success, otherwise - failure 32 33 @ref Developer 34 **/ 35 int CeedOperatorCreateFallback(CeedOperator op) { 36 int ierr; 37 38 // Fallback Ceed 39 const char *resource, *fallback_resource; 40 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 41 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 42 CeedChk(ierr); 43 if (!strcmp(resource, fallback_resource)) 44 // LCOV_EXCL_START 45 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 46 "Backend %s cannot create an operator" 47 "fallback to resource %s", resource, fallback_resource); 48 // LCOV_EXCL_STOP 49 50 // Fallback Ceed 51 Ceed ceed_ref; 52 if (!op->ceed->op_fallback_ceed) { 53 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 54 ceed_ref->op_fallback_parent = op->ceed; 55 ceed_ref->Error = op->ceed->Error; 56 op->ceed->op_fallback_ceed = ceed_ref; 57 } 58 ceed_ref = op->ceed->op_fallback_ceed; 59 60 // Clone Op 61 CeedOperator op_ref; 62 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 63 memcpy(op_ref, op, sizeof(*op_ref)); 64 op_ref->data = NULL; 65 op_ref->is_interface_setup = false; 66 op_ref->is_backend_setup = false; 67 op_ref->ceed = ceed_ref; 68 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 69 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 70 &op_ref->qf_assembled); CeedChk(ierr); 71 op->op_fallback = op_ref; 72 73 // Clone QF 74 CeedQFunction qf_ref; 75 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 76 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 77 qf_ref->data = NULL; 78 qf_ref->ceed = ceed_ref; 79 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 80 op_ref->qf = qf_ref; 81 op->qf_fallback = qf_ref; 82 return CEED_ERROR_SUCCESS; 83 } 84 85 /** 86 @brief Select correct basis matrix pointer based on CeedEvalMode 87 88 @param[in] eval_mode Current basis evaluation mode 89 @param[in] identity Pointer to identity matrix 90 @param[in] interp Pointer to interpolation matrix 91 @param[in] grad Pointer to gradient matrix 92 @param[out] basis_ptr Basis pointer to set 93 94 @ref Developer 95 **/ 96 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 97 const CeedScalar *identity, const CeedScalar *interp, 98 const CeedScalar *grad, const CeedScalar **basis_ptr) { 99 switch (eval_mode) { 100 case CEED_EVAL_NONE: 101 *basis_ptr = identity; 102 break; 103 case CEED_EVAL_INTERP: 104 *basis_ptr = interp; 105 break; 106 case CEED_EVAL_GRAD: 107 *basis_ptr = grad; 108 break; 109 case CEED_EVAL_WEIGHT: 110 case CEED_EVAL_DIV: 111 case CEED_EVAL_CURL: 112 break; // Caught by QF Assembly 113 } 114 } 115 116 /** 117 @brief Create point block restriction for active operator field 118 119 @param[in] rstr Original CeedElemRestriction for active field 120 @param[out] pointblock_rstr Address of the variable where the newly created 121 CeedElemRestriction will be stored 122 123 @return An error code: 0 - success, otherwise - failure 124 125 @ref Developer 126 **/ 127 static int CeedOperatorCreateActivePointBlockRestriction( 128 CeedElemRestriction rstr, 129 CeedElemRestriction *pointblock_rstr) { 130 int ierr; 131 Ceed ceed; 132 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 133 const CeedInt *offsets; 134 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 135 CeedChk(ierr); 136 137 // Expand offsets 138 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 139 *pointblock_offsets; 140 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 141 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 142 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 143 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 144 CeedInt shift = num_comp; 145 if (comp_stride != 1) 146 shift *= num_comp; 147 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 148 CeedChk(ierr); 149 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 150 pointblock_offsets[i] = offsets[i]*shift; 151 if (pointblock_offsets[i] > max) 152 max = pointblock_offsets[i]; 153 } 154 155 // Create new restriction 156 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 157 1, max + num_comp*num_comp, CEED_MEM_HOST, 158 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 159 CeedChk(ierr); 160 161 // Cleanup 162 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 163 164 return CEED_ERROR_SUCCESS; 165 } 166 167 /** 168 @brief Core logic for assembling operator diagonal or point block diagonal 169 170 @param[in] op CeedOperator to assemble point block diagonal 171 @param[in] request Address of CeedRequest for non-blocking completion, else 172 CEED_REQUEST_IMMEDIATE 173 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 174 @param[out] assembled CeedVector to store assembled diagonal 175 176 @return An error code: 0 - success, otherwise - failure 177 178 @ref Developer 179 **/ 180 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 181 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 182 int ierr; 183 Ceed ceed; 184 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 185 186 // Assemble QFunction 187 CeedQFunction qf; 188 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 189 CeedInt num_input_fields, num_output_fields; 190 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 191 CeedChk(ierr); 192 CeedVector assembled_qf; 193 CeedElemRestriction rstr; 194 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 195 &rstr, request); CeedChk(ierr); 196 CeedInt layout[3]; 197 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 198 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 199 CeedScalar max_norm = 0; 200 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 201 202 // Determine active input basis 203 CeedOperatorField *op_fields; 204 CeedQFunctionField *qf_fields; 205 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 206 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 207 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 208 CeedEvalMode *eval_mode_in = NULL; 209 CeedBasis basis_in = NULL; 210 CeedElemRestriction rstr_in = NULL; 211 for (CeedInt i=0; i<num_input_fields; i++) { 212 CeedVector vec; 213 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 214 if (vec == CEED_VECTOR_ACTIVE) { 215 CeedElemRestriction rstr; 216 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 217 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 218 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 219 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 220 if (rstr_in && rstr_in != rstr) 221 // LCOV_EXCL_START 222 return CeedError(ceed, CEED_ERROR_BACKEND, 223 "Multi-field non-composite operator diagonal assembly not supported"); 224 // LCOV_EXCL_STOP 225 rstr_in = rstr; 226 CeedEvalMode eval_mode; 227 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 228 switch (eval_mode) { 229 case CEED_EVAL_NONE: 230 case CEED_EVAL_INTERP: 231 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 232 eval_mode_in[num_eval_mode_in] = eval_mode; 233 num_eval_mode_in += 1; 234 break; 235 case CEED_EVAL_GRAD: 236 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 237 for (CeedInt d=0; d<dim; d++) 238 eval_mode_in[num_eval_mode_in+d] = eval_mode; 239 num_eval_mode_in += dim; 240 break; 241 case CEED_EVAL_WEIGHT: 242 case CEED_EVAL_DIV: 243 case CEED_EVAL_CURL: 244 break; // Caught by QF Assembly 245 } 246 } 247 } 248 249 // Determine active output basis 250 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 251 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 252 CeedInt num_eval_mode_out = 0; 253 CeedEvalMode *eval_mode_out = NULL; 254 CeedBasis basis_out = NULL; 255 CeedElemRestriction rstr_out = NULL; 256 for (CeedInt i=0; i<num_output_fields; i++) { 257 CeedVector vec; 258 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 259 if (vec == CEED_VECTOR_ACTIVE) { 260 CeedElemRestriction rstr; 261 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 262 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 263 if (rstr_out && rstr_out != rstr) 264 // LCOV_EXCL_START 265 return CeedError(ceed, CEED_ERROR_BACKEND, 266 "Multi-field non-composite operator diagonal assembly not supported"); 267 // LCOV_EXCL_STOP 268 rstr_out = rstr; 269 CeedEvalMode eval_mode; 270 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 271 switch (eval_mode) { 272 case CEED_EVAL_NONE: 273 case CEED_EVAL_INTERP: 274 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 275 eval_mode_out[num_eval_mode_out] = eval_mode; 276 num_eval_mode_out += 1; 277 break; 278 case CEED_EVAL_GRAD: 279 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 280 for (CeedInt d=0; d<dim; d++) 281 eval_mode_out[num_eval_mode_out+d] = eval_mode; 282 num_eval_mode_out += dim; 283 break; 284 case CEED_EVAL_WEIGHT: 285 case CEED_EVAL_DIV: 286 case CEED_EVAL_CURL: 287 break; // Caught by QF Assembly 288 } 289 } 290 } 291 292 // Assemble point block diagonal restriction, if needed 293 CeedElemRestriction diag_rstr = rstr_out; 294 if (is_pointblock) { 295 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 296 CeedChk(ierr); 297 } 298 299 // Create diagonal vector 300 CeedVector elem_diag; 301 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 302 CeedChk(ierr); 303 304 // Assemble element operator diagonals 305 CeedScalar *elem_diag_array; 306 const CeedScalar *assembled_qf_array; 307 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 308 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 309 CeedChk(ierr); 310 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 311 CeedChk(ierr); 312 CeedInt num_elem, num_nodes, num_qpts; 313 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 314 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 315 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 316 // Basis matrices 317 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 318 CeedScalar *identity = NULL; 319 bool evalNone = false; 320 for (CeedInt i=0; i<num_eval_mode_in; i++) 321 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 322 for (CeedInt i=0; i<num_eval_mode_out; i++) 323 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 324 if (evalNone) { 325 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 326 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 327 identity[i*num_nodes+i] = 1.0; 328 } 329 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 330 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 331 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 332 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 333 // Compute the diagonal of B^T D B 334 // Each element 335 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 336 for (CeedInt e=0; e<num_elem; e++) { 337 CeedInt d_out = -1; 338 // Each basis eval mode pair 339 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 340 const CeedScalar *bt = NULL; 341 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 342 d_out += 1; 343 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 344 &grad_out[d_out*num_qpts*num_nodes], &bt); 345 CeedInt d_in = -1; 346 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 347 const CeedScalar *b = NULL; 348 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 349 d_in += 1; 350 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 351 &grad_in[d_in*num_qpts*num_nodes], &b); 352 // Each component 353 for (CeedInt c_out=0; c_out<num_comp; c_out++) 354 // Each qpoint/node pair 355 for (CeedInt q=0; q<num_qpts; q++) 356 if (is_pointblock) { 357 // Point Block Diagonal 358 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 359 const CeedScalar qf_value = 360 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 361 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 362 if (fabs(qf_value) > qf_value_bound) 363 for (CeedInt n=0; n<num_nodes; n++) 364 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 365 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 366 } 367 } else { 368 // Diagonal Only 369 const CeedScalar qf_value = 370 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 371 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 372 if (fabs(qf_value) > qf_value_bound) 373 for (CeedInt n=0; n<num_nodes; n++) 374 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 375 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 376 } 377 } 378 } 379 } 380 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 381 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 382 CeedChk(ierr); 383 384 // Assemble local operator diagonal 385 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 386 assembled, request); CeedChk(ierr); 387 388 // Cleanup 389 if (is_pointblock) { 390 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 391 } 392 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 393 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 394 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 395 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 396 ierr = CeedFree(&identity); CeedChk(ierr); 397 398 return CEED_ERROR_SUCCESS; 399 } 400 401 /** 402 @brief Core logic for assembling composite operator diagonal 403 404 @param[in] op CeedOperator to assemble point block diagonal 405 @param[in] request Address of CeedRequest for non-blocking completion, else 406 CEED_REQUEST_IMMEDIATE 407 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 408 @param[out] assembled CeedVector to store assembled diagonal 409 410 @return An error code: 0 - success, otherwise - failure 411 412 @ref Developer 413 **/ 414 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 415 CeedOperator op, CeedRequest *request, const bool is_pointblock, 416 CeedVector assembled) { 417 int ierr; 418 CeedInt num_sub; 419 CeedOperator *suboperators; 420 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 421 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 422 for (CeedInt i = 0; i < num_sub; i++) { 423 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 424 is_pointblock, assembled); CeedChk(ierr); 425 } 426 return CEED_ERROR_SUCCESS; 427 } 428 429 /** 430 @brief Build nonzero pattern for non-composite operator 431 432 Users should generally use CeedOperatorLinearAssembleSymbolic() 433 434 @param[in] op CeedOperator to assemble nonzero pattern 435 @param[in] offset Offset for number of entries 436 @param[out] rows Row number for each entry 437 @param[out] cols Column number for each entry 438 439 @return An error code: 0 - success, otherwise - failure 440 441 @ref Developer 442 **/ 443 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 444 CeedInt *rows, CeedInt *cols) { 445 int ierr; 446 Ceed ceed = op->ceed; 447 if (op->is_composite) 448 // LCOV_EXCL_START 449 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 450 "Composite operator not supported"); 451 // LCOV_EXCL_STOP 452 453 CeedSize num_nodes; 454 ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr); 455 CeedElemRestriction rstr_in; 456 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 457 CeedInt num_elem, elem_size, num_comp; 458 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 459 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 460 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 461 CeedInt layout_er[3]; 462 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 463 464 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 465 466 // Determine elem_dof relation 467 CeedVector index_vec; 468 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 469 CeedScalar *array; 470 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 471 for (CeedInt i = 0; i < num_nodes; ++i) { 472 array[i] = i; 473 } 474 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 475 CeedVector elem_dof; 476 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 477 CeedChk(ierr); 478 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 479 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 480 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 481 const CeedScalar *elem_dof_a; 482 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 483 CeedChk(ierr); 484 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 485 486 // Determine i, j locations for element matrices 487 CeedInt count = 0; 488 for (int e = 0; e < num_elem; ++e) { 489 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 490 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 491 for (int i = 0; i < elem_size; ++i) { 492 for (int j = 0; j < elem_size; ++j) { 493 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 494 (comp_out)*layout_er[1] + e*layout_er[2]; 495 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 496 (comp_in)*layout_er[1] + e*layout_er[2]; 497 498 const CeedInt row = elem_dof_a[elem_dof_index_row]; 499 const CeedInt col = elem_dof_a[elem_dof_index_col]; 500 501 rows[offset + count] = row; 502 cols[offset + count] = col; 503 count++; 504 } 505 } 506 } 507 } 508 } 509 if (count != local_num_entries) 510 // LCOV_EXCL_START 511 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 512 // LCOV_EXCL_STOP 513 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 514 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 515 516 return CEED_ERROR_SUCCESS; 517 } 518 519 /** 520 @brief Assemble nonzero entries for non-composite operator 521 522 Users should generally use CeedOperatorLinearAssemble() 523 524 @param[in] op CeedOperator to assemble 525 @param[out] offset Offest for number of entries 526 @param[out] values Values to assemble into matrix 527 528 @return An error code: 0 - success, otherwise - failure 529 530 @ref Developer 531 **/ 532 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 533 CeedVector values) { 534 int ierr; 535 Ceed ceed = op->ceed; 536 if (op->is_composite) 537 // LCOV_EXCL_START 538 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 539 "Composite operator not supported"); 540 // LCOV_EXCL_STOP 541 542 // Assemble QFunction 543 CeedQFunction qf; 544 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 545 CeedVector assembled_qf; 546 CeedElemRestriction rstr_q; 547 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 548 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 549 550 CeedSize qf_length; 551 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 552 553 CeedInt num_input_fields, num_output_fields; 554 CeedOperatorField *input_fields; 555 CeedOperatorField *output_fields; 556 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 557 &num_output_fields, &output_fields); CeedChk(ierr); 558 559 // Determine active input basis 560 CeedQFunctionField *qf_fields; 561 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 562 CeedInt num_eval_mode_in = 0, dim = 1; 563 CeedEvalMode *eval_mode_in = NULL; 564 CeedBasis basis_in = NULL; 565 CeedElemRestriction rstr_in = NULL; 566 for (CeedInt i=0; i<num_input_fields; i++) { 567 CeedVector vec; 568 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 569 if (vec == CEED_VECTOR_ACTIVE) { 570 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 571 CeedChk(ierr); 572 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 573 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 574 CeedChk(ierr); 575 CeedEvalMode eval_mode; 576 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 577 CeedChk(ierr); 578 switch (eval_mode) { 579 case CEED_EVAL_NONE: 580 case CEED_EVAL_INTERP: 581 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 582 eval_mode_in[num_eval_mode_in] = eval_mode; 583 num_eval_mode_in += 1; 584 break; 585 case CEED_EVAL_GRAD: 586 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 587 for (CeedInt d=0; d<dim; d++) { 588 eval_mode_in[num_eval_mode_in+d] = eval_mode; 589 } 590 num_eval_mode_in += dim; 591 break; 592 case CEED_EVAL_WEIGHT: 593 case CEED_EVAL_DIV: 594 case CEED_EVAL_CURL: 595 break; // Caught by QF Assembly 596 } 597 } 598 } 599 600 // Determine active output basis 601 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 602 CeedInt num_eval_mode_out = 0; 603 CeedEvalMode *eval_mode_out = NULL; 604 CeedBasis basis_out = NULL; 605 CeedElemRestriction rstr_out = NULL; 606 for (CeedInt i=0; i<num_output_fields; i++) { 607 CeedVector vec; 608 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 609 if (vec == CEED_VECTOR_ACTIVE) { 610 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 611 CeedChk(ierr); 612 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 613 CeedChk(ierr); 614 CeedEvalMode eval_mode; 615 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 616 CeedChk(ierr); 617 switch (eval_mode) { 618 case CEED_EVAL_NONE: 619 case CEED_EVAL_INTERP: 620 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 621 eval_mode_out[num_eval_mode_out] = eval_mode; 622 num_eval_mode_out += 1; 623 break; 624 case CEED_EVAL_GRAD: 625 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 626 for (CeedInt d=0; d<dim; d++) { 627 eval_mode_out[num_eval_mode_out+d] = eval_mode; 628 } 629 num_eval_mode_out += dim; 630 break; 631 case CEED_EVAL_WEIGHT: 632 case CEED_EVAL_DIV: 633 case CEED_EVAL_CURL: 634 break; // Caught by QF Assembly 635 } 636 } 637 } 638 639 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 640 // LCOV_EXCL_START 641 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 642 "Cannot assemble operator with out inputs/outputs"); 643 // LCOV_EXCL_STOP 644 645 CeedInt num_elem, elem_size, num_qpts, num_comp; 646 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 647 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 648 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 649 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 650 651 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 652 653 // loop over elements and put in data structure 654 const CeedScalar *interp_in, *grad_in; 655 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 656 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 657 658 const CeedScalar *assembled_qf_array; 659 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 660 CeedChk(ierr); 661 662 CeedInt layout_qf[3]; 663 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 664 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 665 666 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 667 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 668 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 669 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 670 num_qpts]; // logically 3-tensor 671 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 672 CeedScalar elem_mat[elem_size * elem_size]; 673 int count = 0; 674 CeedScalar *vals; 675 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 676 for (int e = 0; e < num_elem; ++e) { 677 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 678 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 679 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 680 B_mat_in[ell] = 0.0; 681 } 682 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 683 B_mat_out[ell] = 0.0; 684 } 685 // Store block-diagonal D matrix as collection of small dense blocks 686 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 687 D_mat[ell] = 0.0; 688 } 689 // form element matrix itself (for each block component) 690 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 691 elem_mat[ell] = 0.0; 692 } 693 for (int q = 0; q < num_qpts; ++q) { 694 for (int n = 0; n < elem_size; ++n) { 695 CeedInt d_in = -1; 696 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 697 const int qq = num_eval_mode_in*q; 698 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 699 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 700 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 701 d_in += 1; 702 B_mat_in[(qq+e_in)*elem_size + n] += 703 grad_in[(d_in*num_qpts+q) * elem_size + n]; 704 } else { 705 // LCOV_EXCL_START 706 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 707 // LCOV_EXCL_STOP 708 } 709 } 710 CeedInt d_out = -1; 711 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 712 const int qq = num_eval_mode_out*q; 713 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 714 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 715 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 716 d_out += 1; 717 B_mat_out[(qq+e_out)*elem_size + n] += 718 grad_in[(d_out*num_qpts+q) * elem_size + n]; 719 } else { 720 // LCOV_EXCL_START 721 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 722 // LCOV_EXCL_STOP 723 } 724 } 725 } 726 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 727 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 728 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 729 +comp_out; 730 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 731 e*layout_qf[2]; 732 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 733 } 734 } 735 } 736 // Compute B^T*D 737 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 738 BTD[ell] = 0.0; 739 } 740 for (int j = 0; j<elem_size; ++j) { 741 for (int q = 0; q<num_qpts; ++q) { 742 int qq = num_eval_mode_out*q; 743 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 744 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 745 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 746 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 747 } 748 } 749 } 750 } 751 752 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 753 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 754 755 // put element matrix in coordinate data structure 756 for (int i = 0; i < elem_size; ++i) { 757 for (int j = 0; j < elem_size; ++j) { 758 vals[offset + count] = elem_mat[i*elem_size + j]; 759 count++; 760 } 761 } 762 } 763 } 764 } 765 if (count != local_num_entries) 766 // LCOV_EXCL_START 767 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 768 // LCOV_EXCL_STOP 769 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 770 771 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 772 CeedChk(ierr); 773 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 774 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 775 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 776 777 return CEED_ERROR_SUCCESS; 778 } 779 780 /** 781 @brief Count number of entries for assembled CeedOperator 782 783 @param[in] op CeedOperator to assemble 784 @param[out] num_entries Number of entries in assembled representation 785 786 @return An error code: 0 - success, otherwise - failure 787 788 @ref Utility 789 **/ 790 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 791 CeedInt *num_entries) { 792 int ierr; 793 CeedElemRestriction rstr; 794 CeedInt num_elem, elem_size, num_comp; 795 796 if (op->is_composite) 797 // LCOV_EXCL_START 798 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 799 "Composite operator not supported"); 800 // LCOV_EXCL_STOP 801 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 802 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 803 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 804 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 805 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 806 807 return CEED_ERROR_SUCCESS; 808 } 809 810 /** 811 @brief Common code for creating a multigrid coarse operator and level 812 transfer operators for a CeedOperator 813 814 @param[in] op_fine Fine grid operator 815 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 816 @param[in] rstr_coarse Coarse grid restriction 817 @param[in] basis_coarse Coarse grid active vector basis 818 @param[in] basis_c_to_f Basis for coarse to fine interpolation 819 @param[out] op_coarse Coarse grid operator 820 @param[out] op_prolong Coarse to fine operator 821 @param[out] op_restrict Fine to coarse operator 822 823 @return An error code: 0 - success, otherwise - failure 824 825 @ref Developer 826 **/ 827 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 828 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 829 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 830 CeedOperator *op_restrict) { 831 int ierr; 832 Ceed ceed; 833 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 834 835 // Check for composite operator 836 bool is_composite; 837 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 838 if (is_composite) 839 // LCOV_EXCL_START 840 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 841 "Automatic multigrid setup for composite operators not supported"); 842 // LCOV_EXCL_STOP 843 844 // Coarse Grid 845 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 846 op_coarse); CeedChk(ierr); 847 CeedElemRestriction rstr_fine = NULL; 848 // -- Clone input fields 849 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 850 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 851 rstr_fine = op_fine->input_fields[i]->elem_restr; 852 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 853 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 854 CeedChk(ierr); 855 } else { 856 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 857 op_fine->input_fields[i]->elem_restr, 858 op_fine->input_fields[i]->basis, 859 op_fine->input_fields[i]->vec); CeedChk(ierr); 860 } 861 } 862 // -- Clone output fields 863 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 864 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 865 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 866 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 867 CeedChk(ierr); 868 } else { 869 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 870 op_fine->output_fields[i]->elem_restr, 871 op_fine->output_fields[i]->basis, 872 op_fine->output_fields[i]->vec); CeedChk(ierr); 873 } 874 } 875 // -- Clone QFunctionAssemblyData 876 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 877 &(*op_coarse)->qf_assembled); CeedChk(ierr); 878 879 // Multiplicity vector 880 CeedVector mult_vec, mult_e_vec; 881 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 882 CeedChk(ierr); 883 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 884 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 885 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 886 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 887 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 888 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 889 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 890 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 891 892 // Restriction 893 CeedInt num_comp; 894 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 895 CeedQFunction qf_restrict; 896 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 897 CeedChk(ierr); 898 CeedInt *num_comp_r_data; 899 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 900 num_comp_r_data[0] = num_comp; 901 CeedQFunctionContext ctx_r; 902 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 903 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 904 sizeof(*num_comp_r_data), num_comp_r_data); 905 CeedChk(ierr); 906 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 907 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 908 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 909 CeedChk(ierr); 910 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 911 CeedChk(ierr); 912 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 913 CEED_EVAL_INTERP); CeedChk(ierr); 914 ierr = CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp); CeedChk(ierr); 915 916 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 917 CEED_QFUNCTION_NONE, op_restrict); 918 CeedChk(ierr); 919 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 920 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 921 CeedChk(ierr); 922 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 923 CEED_BASIS_COLLOCATED, mult_vec); 924 CeedChk(ierr); 925 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 926 CEED_VECTOR_ACTIVE); CeedChk(ierr); 927 928 // Prolongation 929 CeedQFunction qf_prolong; 930 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 931 CeedChk(ierr); 932 CeedInt *num_comp_p_data; 933 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 934 num_comp_p_data[0] = num_comp; 935 CeedQFunctionContext ctx_p; 936 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 937 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 938 sizeof(*num_comp_p_data), num_comp_p_data); 939 CeedChk(ierr); 940 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 941 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 942 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 943 CeedChk(ierr); 944 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 945 CeedChk(ierr); 946 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 947 CeedChk(ierr); 948 ierr = CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp); CeedChk(ierr); 949 950 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 951 CEED_QFUNCTION_NONE, op_prolong); 952 CeedChk(ierr); 953 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 954 CEED_VECTOR_ACTIVE); CeedChk(ierr); 955 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 956 CEED_BASIS_COLLOCATED, mult_vec); 957 CeedChk(ierr); 958 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 959 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 960 CeedChk(ierr); 961 962 // Cleanup 963 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 964 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 965 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 966 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 967 return CEED_ERROR_SUCCESS; 968 } 969 970 /** 971 @brief Build 1D mass matrix and Laplacian with perturbation 972 973 @param[in] interp_1d Interpolation matrix in one dimension 974 @param[in] grad_1d Gradient matrix in one dimension 975 @param[in] q_weight_1d Quadrature weights in one dimension 976 @param[in] P_1d Number of basis nodes in one dimension 977 @param[in] Q_1d Number of quadrature points in one dimension 978 @param[in] dim Dimension of basis 979 @param[out] mass Assembled mass matrix in one dimension 980 @param[out] laplace Assembled perturbed Laplacian in one dimension 981 982 @return An error code: 0 - success, otherwise - failure 983 984 @ref Developer 985 **/ 986 CeedPragmaOptimizeOff 987 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 988 const CeedScalar *grad_1d, 989 const CeedScalar *q_weight_1d, CeedInt P_1d, 990 CeedInt Q_1d, CeedInt dim, 991 CeedScalar *mass, CeedScalar *laplace) { 992 993 for (CeedInt i=0; i<P_1d; i++) 994 for (CeedInt j=0; j<P_1d; j++) { 995 CeedScalar sum = 0.0; 996 for (CeedInt k=0; k<Q_1d; k++) 997 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 998 mass[i+j*P_1d] = sum; 999 } 1000 // -- Laplacian 1001 for (CeedInt i=0; i<P_1d; i++) 1002 for (CeedInt j=0; j<P_1d; j++) { 1003 CeedScalar sum = 0.0; 1004 for (CeedInt k=0; k<Q_1d; k++) 1005 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1006 laplace[i+j*P_1d] = sum; 1007 } 1008 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1009 for (CeedInt i=0; i<P_1d; i++) 1010 laplace[i+P_1d*i] += perturbation; 1011 return CEED_ERROR_SUCCESS; 1012 } 1013 CeedPragmaOptimizeOn 1014 1015 /// @} 1016 1017 /// ---------------------------------------------------------------------------- 1018 /// CeedOperator Backend API 1019 /// ---------------------------------------------------------------------------- 1020 /// @addtogroup CeedOperatorBackend 1021 /// @{ 1022 1023 /** 1024 @brief Create object holding CeedQFunction assembly data for CeedOperator 1025 1026 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1027 @param[out] data Address of the variable where the newly created 1028 CeedQFunctionAssemblyData will be stored 1029 1030 @return An error code: 0 - success, otherwise - failure 1031 1032 @ref Backend 1033 **/ 1034 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1035 CeedQFunctionAssemblyData *data) { 1036 int ierr; 1037 1038 ierr = CeedCalloc(1, data); CeedChk(ierr); 1039 (*data)->ref_count = 1; 1040 (*data)->ceed = ceed; 1041 ierr = CeedReference(ceed); CeedChk(ierr); 1042 1043 return CEED_ERROR_SUCCESS; 1044 } 1045 1046 /** 1047 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1048 1049 @param data CeedQFunctionAssemblyData to increment the reference counter 1050 1051 @return An error code: 0 - success, otherwise - failure 1052 1053 @ref Backend 1054 **/ 1055 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1056 data->ref_count++; 1057 return CEED_ERROR_SUCCESS; 1058 } 1059 1060 /** 1061 @brief Set re-use of CeedQFunctionAssemblyData 1062 1063 @param data CeedQFunctionAssemblyData to mark for reuse 1064 @param reuse_data Boolean flag indicating data re-use 1065 1066 @return An error code: 0 - success, otherwise - failure 1067 1068 @ref Backend 1069 **/ 1070 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, 1071 bool reuse_data) { 1072 data->reuse_data = reuse_data; 1073 data->needs_data_update = true; 1074 return CEED_ERROR_SUCCESS; 1075 } 1076 1077 /** 1078 @brief Mark QFunctionAssemblyData as stale 1079 1080 @param data CeedQFunctionAssemblyData to mark as stale 1081 @param needs_data_update Boolean flag indicating if update is needed or completed 1082 1083 @return An error code: 0 - success, otherwise - failure 1084 1085 @ref Backend 1086 **/ 1087 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, 1088 bool needs_data_update) { 1089 data->needs_data_update = needs_data_update; 1090 return CEED_ERROR_SUCCESS; 1091 } 1092 1093 /** 1094 @brief Determine if QFunctionAssemblyData needs update 1095 1096 @param[in] data CeedQFunctionAssemblyData to mark as stale 1097 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1098 1099 @return An error code: 0 - success, otherwise - failure 1100 1101 @ref Backend 1102 **/ 1103 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1104 bool *is_update_needed) { 1105 *is_update_needed = !data->reuse_data || data->needs_data_update; 1106 return CEED_ERROR_SUCCESS; 1107 } 1108 1109 /** 1110 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1111 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1112 Note: If `*data_copy` is non-NULL, then it is assumed that 1113 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1114 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1115 the only reference to this CeedQFunctionAssemblyData. 1116 1117 @param data CeedQFunctionAssemblyData to copy reference to 1118 @param[out] data_copy Variable to store copied reference 1119 1120 @return An error code: 0 - success, otherwise - failure 1121 1122 @ref Backend 1123 **/ 1124 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1125 CeedQFunctionAssemblyData *data_copy) { 1126 int ierr; 1127 1128 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1129 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1130 *data_copy = data; 1131 return CEED_ERROR_SUCCESS; 1132 } 1133 1134 /** 1135 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1136 1137 @param[in] data CeedQFunctionAssemblyData to retreive status 1138 @param[out] is_setup Boolean flag for setup status 1139 1140 @return An error code: 0 - success, otherwise - failure 1141 1142 @ref Backend 1143 **/ 1144 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1145 bool *is_setup) { 1146 *is_setup = data->is_setup; 1147 return CEED_ERROR_SUCCESS; 1148 } 1149 1150 /** 1151 @brief Set internal objects for CeedQFunctionAssemblyData 1152 1153 @param[in] data CeedQFunctionAssemblyData to set objects 1154 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1155 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1156 1157 @return An error code: 0 - success, otherwise - failure 1158 1159 @ref Backend 1160 **/ 1161 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1162 CeedVector vec, CeedElemRestriction rstr) { 1163 int ierr; 1164 1165 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1166 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1167 1168 data->is_setup = true; 1169 return CEED_ERROR_SUCCESS; 1170 } 1171 1172 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1173 CeedVector *vec, CeedElemRestriction *rstr) { 1174 int ierr; 1175 1176 if (!data->is_setup) 1177 // LCOV_EXCL_START 1178 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1179 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1180 // LCOV_EXCL_STOP 1181 1182 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1183 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1184 1185 return CEED_ERROR_SUCCESS; 1186 } 1187 1188 /** 1189 @brief Destroy CeedQFunctionAssemblyData 1190 1191 @param[out] data CeedQFunctionAssemblyData to destroy 1192 1193 @return An error code: 0 - success, otherwise - failure 1194 1195 @ref Backend 1196 **/ 1197 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1198 int ierr; 1199 1200 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1201 1202 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1203 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1204 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1205 1206 ierr = CeedFree(data); CeedChk(ierr); 1207 return CEED_ERROR_SUCCESS; 1208 } 1209 1210 /// @} 1211 1212 /// ---------------------------------------------------------------------------- 1213 /// CeedOperator Public API 1214 /// ---------------------------------------------------------------------------- 1215 /// @addtogroup CeedOperatorUser 1216 /// @{ 1217 1218 /** 1219 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1220 1221 This returns a CeedVector containing a matrix at each quadrature point 1222 providing the action of the CeedQFunction associated with the CeedOperator. 1223 The vector 'assembled' is of shape 1224 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1225 and contains column-major matrices representing the action of the 1226 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1227 outputs are in the order provided by the user when adding CeedOperator fields. 1228 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1229 'v', provided in that order, would result in an assembled QFunction that 1230 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1231 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1232 1233 Note: Calling this function asserts that setup is complete 1234 and sets the CeedOperator as immutable. 1235 1236 @param op CeedOperator to assemble CeedQFunction 1237 @param[out] assembled CeedVector to store assembled CeedQFunction at 1238 quadrature points 1239 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1240 CeedQFunction 1241 @param request Address of CeedRequest for non-blocking completion, else 1242 @ref CEED_REQUEST_IMMEDIATE 1243 1244 @return An error code: 0 - success, otherwise - failure 1245 1246 @ref User 1247 **/ 1248 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1249 CeedElemRestriction *rstr, 1250 CeedRequest *request) { 1251 int ierr; 1252 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1253 1254 // Backend version 1255 if (op->LinearAssembleQFunction) { 1256 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1257 CeedChk(ierr); 1258 } else { 1259 // Fallback to reference Ceed 1260 if (!op->op_fallback) { 1261 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1262 } 1263 // Assemble 1264 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1265 rstr, request); CeedChk(ierr); 1266 } 1267 return CEED_ERROR_SUCCESS; 1268 } 1269 1270 /** 1271 @brief Assemble CeedQFunction and store result internall. Return copied 1272 references of stored data to the caller. Caller is responsible for 1273 ownership and destruction of the copied references. See also 1274 @ref CeedOperatorLinearAssembleQFunction 1275 1276 @param op CeedOperator to assemble CeedQFunction 1277 @param assembled CeedVector to store assembled CeedQFunction at 1278 quadrature points 1279 @param rstr CeedElemRestriction for CeedVector containing assembled 1280 CeedQFunction 1281 @param request Address of CeedRequest for non-blocking completion, else 1282 @ref CEED_REQUEST_IMMEDIATE 1283 1284 @return An error code: 0 - success, otherwise - failure 1285 1286 @ref User 1287 **/ 1288 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1289 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1290 int ierr; 1291 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1292 1293 // Backend version 1294 if (op->LinearAssembleQFunctionUpdate) { 1295 bool qf_assembled_is_setup; 1296 CeedVector assembled_vec = NULL; 1297 CeedElemRestriction assembled_rstr = NULL; 1298 1299 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1300 &qf_assembled_is_setup); CeedChk(ierr); 1301 if (qf_assembled_is_setup) { 1302 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1303 &assembled_rstr); CeedChk(ierr); 1304 1305 bool update_needed; 1306 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1307 &update_needed); CeedChk(ierr); 1308 if (update_needed) { 1309 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1310 request); CeedChk(ierr); 1311 } 1312 } else { 1313 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1314 request); CeedChk(ierr); 1315 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1316 assembled_rstr); CeedChk(ierr); 1317 } 1318 ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); 1319 CeedChk(ierr); 1320 1321 // Copy reference to internally held copy 1322 *assembled = NULL; 1323 *rstr = NULL; 1324 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1325 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1326 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1327 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1328 } else { 1329 // Fallback to reference Ceed 1330 if (!op->op_fallback) { 1331 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1332 } 1333 // Assemble 1334 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1335 assembled, rstr, request); CeedChk(ierr); 1336 } 1337 1338 return CEED_ERROR_SUCCESS; 1339 } 1340 1341 /** 1342 @brief Assemble the diagonal of a square linear CeedOperator 1343 1344 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1345 1346 Note: Currently only non-composite CeedOperators with a single field and 1347 composite CeedOperators with single field sub-operators are supported. 1348 1349 Note: Calling this function asserts that setup is complete 1350 and sets the CeedOperator as immutable. 1351 1352 @param op CeedOperator to assemble CeedQFunction 1353 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1354 @param request Address of CeedRequest for non-blocking completion, else 1355 @ref CEED_REQUEST_IMMEDIATE 1356 1357 @return An error code: 0 - success, otherwise - failure 1358 1359 @ref User 1360 **/ 1361 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1362 CeedRequest *request) { 1363 int ierr; 1364 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1365 1366 CeedSize input_size = 0, output_size = 0; 1367 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1368 CeedChk(ierr); 1369 if (input_size != output_size) 1370 // LCOV_EXCL_START 1371 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1372 // LCOV_EXCL_STOP 1373 1374 // Use backend version, if available 1375 if (op->LinearAssembleDiagonal) { 1376 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1377 return CEED_ERROR_SUCCESS; 1378 } else if (op->LinearAssembleAddDiagonal) { 1379 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1380 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1381 return CEED_ERROR_SUCCESS; 1382 } else { 1383 // Check for valid fallback resource 1384 const char *resource, *fallback_resource; 1385 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1386 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1387 CeedChk(ierr); 1388 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1389 // Fallback to reference Ceed 1390 if (!op->op_fallback) { 1391 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1392 } 1393 // Assemble 1394 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1395 CeedChk(ierr); 1396 return CEED_ERROR_SUCCESS; 1397 } 1398 } 1399 1400 // Default interface implementation 1401 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1402 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1403 CeedChk(ierr); 1404 return CEED_ERROR_SUCCESS; 1405 } 1406 1407 /** 1408 @brief Assemble the diagonal of a square linear CeedOperator 1409 1410 This sums into a CeedVector the diagonal of a linear CeedOperator. 1411 1412 Note: Currently only non-composite CeedOperators with a single field and 1413 composite CeedOperators with single field sub-operators are supported. 1414 1415 Note: Calling this function asserts that setup is complete 1416 and sets the CeedOperator as immutable. 1417 1418 @param op CeedOperator to assemble CeedQFunction 1419 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1420 @param request Address of CeedRequest for non-blocking completion, else 1421 @ref CEED_REQUEST_IMMEDIATE 1422 1423 @return An error code: 0 - success, otherwise - failure 1424 1425 @ref User 1426 **/ 1427 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1428 CeedRequest *request) { 1429 int ierr; 1430 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1431 1432 CeedSize input_size = 0, output_size = 0; 1433 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1434 CeedChk(ierr); 1435 if (input_size != output_size) 1436 // LCOV_EXCL_START 1437 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1438 // LCOV_EXCL_STOP 1439 1440 // Use backend version, if available 1441 if (op->LinearAssembleAddDiagonal) { 1442 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1443 return CEED_ERROR_SUCCESS; 1444 } else { 1445 // Check for valid fallback resource 1446 const char *resource, *fallback_resource; 1447 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1448 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1449 CeedChk(ierr); 1450 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1451 // Fallback to reference Ceed 1452 if (!op->op_fallback) { 1453 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1454 } 1455 // Assemble 1456 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1457 request); CeedChk(ierr); 1458 return CEED_ERROR_SUCCESS; 1459 } 1460 } 1461 1462 // Default interface implementation 1463 bool is_composite; 1464 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1465 if (is_composite) { 1466 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1467 false, assembled); CeedChk(ierr); 1468 return CEED_ERROR_SUCCESS; 1469 } else { 1470 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1471 CeedChk(ierr); 1472 return CEED_ERROR_SUCCESS; 1473 } 1474 } 1475 1476 /** 1477 @brief Assemble the point block diagonal of a square linear CeedOperator 1478 1479 This overwrites a CeedVector with the point block diagonal of a linear 1480 CeedOperator. 1481 1482 Note: Currently only non-composite CeedOperators with a single field and 1483 composite CeedOperators with single field sub-operators are supported. 1484 1485 Note: Calling this function asserts that setup is complete 1486 and sets the CeedOperator as immutable. 1487 1488 @param op CeedOperator to assemble CeedQFunction 1489 @param[out] assembled CeedVector to store assembled CeedOperator point block 1490 diagonal, provided in row-major form with an 1491 @a num_comp * @a num_comp block at each node. The dimensions 1492 of this vector are derived from the active vector 1493 for the CeedOperator. The array has shape 1494 [nodes, component out, component in]. 1495 @param request Address of CeedRequest for non-blocking completion, else 1496 CEED_REQUEST_IMMEDIATE 1497 1498 @return An error code: 0 - success, otherwise - failure 1499 1500 @ref User 1501 **/ 1502 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1503 CeedVector assembled, CeedRequest *request) { 1504 int ierr; 1505 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1506 1507 CeedSize input_size = 0, output_size = 0; 1508 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1509 CeedChk(ierr); 1510 if (input_size != output_size) 1511 // LCOV_EXCL_START 1512 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1513 // LCOV_EXCL_STOP 1514 1515 // Use backend version, if available 1516 if (op->LinearAssemblePointBlockDiagonal) { 1517 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1518 CeedChk(ierr); 1519 return CEED_ERROR_SUCCESS; 1520 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1521 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1522 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1523 request); CeedChk(ierr); 1524 return CEED_ERROR_SUCCESS; 1525 } else { 1526 // Check for valid fallback resource 1527 const char *resource, *fallback_resource; 1528 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1529 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1530 CeedChk(ierr); 1531 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1532 // Fallback to reference Ceed 1533 if (!op->op_fallback) { 1534 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1535 } 1536 // Assemble 1537 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1538 assembled, request); CeedChk(ierr); 1539 return CEED_ERROR_SUCCESS; 1540 } 1541 } 1542 1543 // Default interface implementation 1544 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1545 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1546 CeedChk(ierr); 1547 return CEED_ERROR_SUCCESS; 1548 } 1549 1550 /** 1551 @brief Assemble the point block diagonal of a square linear CeedOperator 1552 1553 This sums into a CeedVector with the point block diagonal of a linear 1554 CeedOperator. 1555 1556 Note: Currently only non-composite CeedOperators with a single field and 1557 composite CeedOperators with single field sub-operators are supported. 1558 1559 Note: Calling this function asserts that setup is complete 1560 and sets the CeedOperator as immutable. 1561 1562 @param op CeedOperator to assemble CeedQFunction 1563 @param[out] assembled CeedVector to store assembled CeedOperator point block 1564 diagonal, provided in row-major form with an 1565 @a num_comp * @a num_comp block at each node. The dimensions 1566 of this vector are derived from the active vector 1567 for the CeedOperator. The array has shape 1568 [nodes, component out, component in]. 1569 @param request Address of CeedRequest for non-blocking completion, else 1570 CEED_REQUEST_IMMEDIATE 1571 1572 @return An error code: 0 - success, otherwise - failure 1573 1574 @ref User 1575 **/ 1576 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1577 CeedVector assembled, CeedRequest *request) { 1578 int ierr; 1579 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1580 1581 CeedSize input_size = 0, output_size = 0; 1582 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1583 CeedChk(ierr); 1584 if (input_size != output_size) 1585 // LCOV_EXCL_START 1586 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1587 // LCOV_EXCL_STOP 1588 1589 // Use backend version, if available 1590 if (op->LinearAssembleAddPointBlockDiagonal) { 1591 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1592 CeedChk(ierr); 1593 return CEED_ERROR_SUCCESS; 1594 } else { 1595 // Check for valid fallback resource 1596 const char *resource, *fallback_resource; 1597 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1598 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1599 CeedChk(ierr); 1600 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1601 // Fallback to reference Ceed 1602 if (!op->op_fallback) { 1603 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1604 } 1605 // Assemble 1606 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1607 assembled, request); CeedChk(ierr); 1608 return CEED_ERROR_SUCCESS; 1609 } 1610 } 1611 1612 // Default interface implemenation 1613 bool is_composite; 1614 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1615 if (is_composite) { 1616 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1617 true, assembled); CeedChk(ierr); 1618 return CEED_ERROR_SUCCESS; 1619 } else { 1620 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1621 CeedChk(ierr); 1622 return CEED_ERROR_SUCCESS; 1623 } 1624 } 1625 1626 /** 1627 @brief Fully assemble the nonzero pattern of a linear operator. 1628 1629 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1630 1631 The assembly routines use coordinate format, with num_entries tuples of the 1632 form (i, j, value) which indicate that value should be added to the matrix 1633 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1634 This function returns the number of entries and their (i, j) locations, 1635 while CeedOperatorLinearAssemble() provides the values in the same 1636 ordering. 1637 1638 This will generally be slow unless your operator is low-order. 1639 1640 Note: Calling this function asserts that setup is complete 1641 and sets the CeedOperator as immutable. 1642 1643 @param[in] op CeedOperator to assemble 1644 @param[out] num_entries Number of entries in coordinate nonzero pattern 1645 @param[out] rows Row number for each entry 1646 @param[out] cols Column number for each entry 1647 1648 @ref User 1649 **/ 1650 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1651 CeedInt **rows, CeedInt **cols) { 1652 int ierr; 1653 CeedInt num_suboperators, single_entries; 1654 CeedOperator *sub_operators; 1655 bool is_composite; 1656 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1657 1658 // Use backend version, if available 1659 if (op->LinearAssembleSymbolic) { 1660 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1661 return CEED_ERROR_SUCCESS; 1662 } else { 1663 // Check for valid fallback resource 1664 const char *resource, *fallback_resource; 1665 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1666 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1667 CeedChk(ierr); 1668 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1669 // Fallback to reference Ceed 1670 if (!op->op_fallback) { 1671 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1672 } 1673 // Assemble 1674 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1675 cols); CeedChk(ierr); 1676 return CEED_ERROR_SUCCESS; 1677 } 1678 } 1679 1680 // Default interface implementation 1681 1682 // count entries and allocate rows, cols arrays 1683 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1684 *num_entries = 0; 1685 if (is_composite) { 1686 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1687 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1688 for (int k = 0; k < num_suboperators; ++k) { 1689 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1690 &single_entries); CeedChk(ierr); 1691 *num_entries += single_entries; 1692 } 1693 } else { 1694 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1695 &single_entries); CeedChk(ierr); 1696 *num_entries += single_entries; 1697 } 1698 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1699 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1700 1701 // assemble nonzero locations 1702 CeedInt offset = 0; 1703 if (is_composite) { 1704 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1705 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1706 for (int k = 0; k < num_suboperators; ++k) { 1707 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1708 *cols); CeedChk(ierr); 1709 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1710 &single_entries); 1711 CeedChk(ierr); 1712 offset += single_entries; 1713 } 1714 } else { 1715 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1716 CeedChk(ierr); 1717 } 1718 1719 return CEED_ERROR_SUCCESS; 1720 } 1721 1722 /** 1723 @brief Fully assemble the nonzero entries of a linear operator. 1724 1725 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1726 1727 The assembly routines use coordinate format, with num_entries tuples of the 1728 form (i, j, value) which indicate that value should be added to the matrix 1729 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1730 This function returns the values of the nonzero entries to be added, their 1731 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1732 1733 This will generally be slow unless your operator is low-order. 1734 1735 Note: Calling this function asserts that setup is complete 1736 and sets the CeedOperator as immutable. 1737 1738 @param[in] op CeedOperator to assemble 1739 @param[out] values Values to assemble into matrix 1740 1741 @ref User 1742 **/ 1743 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1744 int ierr; 1745 CeedInt num_suboperators, single_entries = 0; 1746 CeedOperator *sub_operators; 1747 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1748 1749 // Use backend version, if available 1750 if (op->LinearAssemble) { 1751 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1752 return CEED_ERROR_SUCCESS; 1753 } else { 1754 // Check for valid fallback resource 1755 const char *resource, *fallback_resource; 1756 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1757 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1758 CeedChk(ierr); 1759 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1760 // Fallback to reference Ceed 1761 if (!op->op_fallback) { 1762 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1763 } 1764 // Assemble 1765 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1766 return CEED_ERROR_SUCCESS; 1767 } 1768 } 1769 1770 // Default interface implementation 1771 bool is_composite; 1772 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1773 1774 CeedInt offset = 0; 1775 if (is_composite) { 1776 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1777 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1778 for (int k = 0; k < num_suboperators; ++k) { 1779 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1780 CeedChk(ierr); 1781 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1782 &single_entries); 1783 CeedChk(ierr); 1784 offset += single_entries; 1785 } 1786 } else { 1787 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1788 } 1789 1790 return CEED_ERROR_SUCCESS; 1791 } 1792 1793 /** 1794 @brief Create a multigrid coarse operator and level transfer operators 1795 for a CeedOperator, creating the prolongation basis from the 1796 fine and coarse grid interpolation 1797 1798 Note: Calling this function asserts that setup is complete 1799 and sets the CeedOperator as immutable. 1800 1801 @param[in] op_fine Fine grid operator 1802 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1803 @param[in] rstr_coarse Coarse grid restriction 1804 @param[in] basis_coarse Coarse grid active vector basis 1805 @param[out] op_coarse Coarse grid operator 1806 @param[out] op_prolong Coarse to fine operator 1807 @param[out] op_restrict Fine to coarse operator 1808 1809 @return An error code: 0 - success, otherwise - failure 1810 1811 @ref User 1812 **/ 1813 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1814 CeedVector p_mult_fine, 1815 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1816 CeedOperator *op_coarse, CeedOperator *op_prolong, 1817 CeedOperator *op_restrict) { 1818 int ierr; 1819 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1820 Ceed ceed; 1821 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1822 1823 // Check for compatible quadrature spaces 1824 CeedBasis basis_fine; 1825 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1826 CeedInt Q_f, Q_c; 1827 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1828 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1829 if (Q_f != Q_c) 1830 // LCOV_EXCL_START 1831 return CeedError(ceed, CEED_ERROR_DIMENSION, 1832 "Bases must have compatible quadrature spaces"); 1833 // LCOV_EXCL_STOP 1834 1835 // Coarse to fine basis 1836 CeedInt P_f, P_c, Q = Q_f; 1837 bool is_tensor_f, is_tensor_c; 1838 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1839 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1840 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1841 if (is_tensor_f && is_tensor_c) { 1842 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1843 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1844 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1845 } else if (!is_tensor_f && !is_tensor_c) { 1846 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1847 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1848 } else { 1849 // LCOV_EXCL_START 1850 return CeedError(ceed, CEED_ERROR_MINOR, 1851 "Bases must both be tensor or non-tensor"); 1852 // LCOV_EXCL_STOP 1853 } 1854 1855 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1856 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1857 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1858 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1859 if (is_tensor_f) { 1860 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1861 memcpy(interp_c, basis_coarse->interp_1d, 1862 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1863 } else { 1864 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1865 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1866 } 1867 1868 // -- QR Factorization, interp_f = Q R 1869 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1870 1871 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1872 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1873 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1874 1875 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1876 for (CeedInt j=0; j<P_c; j++) { // Column j 1877 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1878 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1879 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1880 for (CeedInt k=i+1; k<P_f; k++) 1881 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1882 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1883 } 1884 } 1885 ierr = CeedFree(&tau); CeedChk(ierr); 1886 ierr = CeedFree(&interp_c); CeedChk(ierr); 1887 ierr = CeedFree(&interp_f); CeedChk(ierr); 1888 1889 // Complete with interp_c_to_f versions of code 1890 if (is_tensor_f) { 1891 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1892 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1893 CeedChk(ierr); 1894 } else { 1895 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1896 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1897 CeedChk(ierr); 1898 } 1899 1900 // Cleanup 1901 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1902 return CEED_ERROR_SUCCESS; 1903 } 1904 1905 /** 1906 @brief Create a multigrid coarse operator and level transfer operators 1907 for a CeedOperator with a tensor basis for the active basis 1908 1909 Note: Calling this function asserts that setup is complete 1910 and sets the CeedOperator as immutable. 1911 1912 @param[in] op_fine Fine grid operator 1913 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1914 @param[in] rstr_coarse Coarse grid restriction 1915 @param[in] basis_coarse Coarse grid active vector basis 1916 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1917 @param[out] op_coarse Coarse grid operator 1918 @param[out] op_prolong Coarse to fine operator 1919 @param[out] op_restrict Fine to coarse operator 1920 1921 @return An error code: 0 - success, otherwise - failure 1922 1923 @ref User 1924 **/ 1925 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1926 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1927 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1928 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1929 int ierr; 1930 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1931 Ceed ceed; 1932 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1933 1934 // Check for compatible quadrature spaces 1935 CeedBasis basis_fine; 1936 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1937 CeedInt Q_f, Q_c; 1938 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1939 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1940 if (Q_f != Q_c) 1941 // LCOV_EXCL_START 1942 return CeedError(ceed, CEED_ERROR_DIMENSION, 1943 "Bases must have compatible quadrature spaces"); 1944 // LCOV_EXCL_STOP 1945 1946 // Coarse to fine basis 1947 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1948 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1949 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1950 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1951 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1952 CeedChk(ierr); 1953 P_1d_c = dim == 1 ? num_nodes_c : 1954 dim == 2 ? sqrt(num_nodes_c) : 1955 cbrt(num_nodes_c); 1956 CeedScalar *q_ref, *q_weight, *grad; 1957 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1958 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1959 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1960 CeedBasis basis_c_to_f; 1961 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1962 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1963 CeedChk(ierr); 1964 ierr = CeedFree(&q_ref); CeedChk(ierr); 1965 ierr = CeedFree(&q_weight); CeedChk(ierr); 1966 ierr = CeedFree(&grad); CeedChk(ierr); 1967 1968 // Core code 1969 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1970 basis_coarse, basis_c_to_f, op_coarse, 1971 op_prolong, op_restrict); 1972 CeedChk(ierr); 1973 return CEED_ERROR_SUCCESS; 1974 } 1975 1976 /** 1977 @brief Create a multigrid coarse operator and level transfer operators 1978 for a CeedOperator with a non-tensor basis for the active vector 1979 1980 Note: Calling this function asserts that setup is complete 1981 and sets the CeedOperator as immutable. 1982 1983 @param[in] op_fine Fine grid operator 1984 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1985 @param[in] rstr_coarse Coarse grid restriction 1986 @param[in] basis_coarse Coarse grid active vector basis 1987 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1988 @param[out] op_coarse Coarse grid operator 1989 @param[out] op_prolong Coarse to fine operator 1990 @param[out] op_restrict Fine to coarse operator 1991 1992 @return An error code: 0 - success, otherwise - failure 1993 1994 @ref User 1995 **/ 1996 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 1997 CeedVector p_mult_fine, 1998 CeedElemRestriction rstr_coarse, 1999 CeedBasis basis_coarse, 2000 const CeedScalar *interp_c_to_f, 2001 CeedOperator *op_coarse, 2002 CeedOperator *op_prolong, 2003 CeedOperator *op_restrict) { 2004 int ierr; 2005 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2006 Ceed ceed; 2007 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2008 2009 // Check for compatible quadrature spaces 2010 CeedBasis basis_fine; 2011 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2012 CeedInt Q_f, Q_c; 2013 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2014 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2015 if (Q_f != Q_c) 2016 // LCOV_EXCL_START 2017 return CeedError(ceed, CEED_ERROR_DIMENSION, 2018 "Bases must have compatible quadrature spaces"); 2019 // LCOV_EXCL_STOP 2020 2021 // Coarse to fine basis 2022 CeedElemTopology topo; 2023 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 2024 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2025 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2026 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2027 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 2028 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2029 CeedChk(ierr); 2030 CeedScalar *q_ref, *q_weight, *grad; 2031 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 2032 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 2033 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 2034 CeedBasis basis_c_to_f; 2035 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 2036 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2037 CeedChk(ierr); 2038 ierr = CeedFree(&q_ref); CeedChk(ierr); 2039 ierr = CeedFree(&q_weight); CeedChk(ierr); 2040 ierr = CeedFree(&grad); CeedChk(ierr); 2041 2042 // Core code 2043 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2044 basis_coarse, basis_c_to_f, op_coarse, 2045 op_prolong, op_restrict); 2046 CeedChk(ierr); 2047 return CEED_ERROR_SUCCESS; 2048 } 2049 2050 /** 2051 @brief Build a FDM based approximate inverse for each element for a 2052 CeedOperator 2053 2054 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2055 Method based approximate inverse. This function obtains the simultaneous 2056 diagonalization for the 1D mass and Laplacian operators, 2057 M = V^T V, K = V^T S V. 2058 The assembled QFunction is used to modify the eigenvalues from simultaneous 2059 diagonalization and obtain an approximate inverse of the form 2060 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2061 associated CeedQFunction must therefore also be linear. 2062 2063 Note: Calling this function asserts that setup is complete 2064 and sets the CeedOperator as immutable. 2065 2066 @param op CeedOperator to create element inverses 2067 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2068 for each element 2069 @param request Address of CeedRequest for non-blocking completion, else 2070 @ref CEED_REQUEST_IMMEDIATE 2071 2072 @return An error code: 0 - success, otherwise - failure 2073 2074 @ref User 2075 **/ 2076 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2077 CeedRequest *request) { 2078 int ierr; 2079 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2080 2081 // Use backend version, if available 2082 if (op->CreateFDMElementInverse) { 2083 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2084 return CEED_ERROR_SUCCESS; 2085 } else { 2086 // Check for valid fallback resource 2087 const char *resource, *fallback_resource; 2088 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2089 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2090 CeedChk(ierr); 2091 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2092 // Fallback to reference Ceed 2093 if (!op->op_fallback) { 2094 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2095 } 2096 // Assemble 2097 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2098 CeedChk(ierr); 2099 return CEED_ERROR_SUCCESS; 2100 } 2101 } 2102 2103 // Interface implementation 2104 Ceed ceed, ceed_parent; 2105 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2106 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2107 ceed_parent = ceed_parent ? ceed_parent : ceed; 2108 CeedQFunction qf; 2109 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2110 2111 // Determine active input basis 2112 bool interp = false, grad = false; 2113 CeedBasis basis = NULL; 2114 CeedElemRestriction rstr = NULL; 2115 CeedOperatorField *op_fields; 2116 CeedQFunctionField *qf_fields; 2117 CeedInt num_input_fields; 2118 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2119 CeedChk(ierr); 2120 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2121 for (CeedInt i=0; i<num_input_fields; i++) { 2122 CeedVector vec; 2123 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2124 if (vec == CEED_VECTOR_ACTIVE) { 2125 CeedEvalMode eval_mode; 2126 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2127 interp = interp || eval_mode == CEED_EVAL_INTERP; 2128 grad = grad || eval_mode == CEED_EVAL_GRAD; 2129 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2130 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2131 } 2132 } 2133 if (!basis) 2134 // LCOV_EXCL_START 2135 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2136 // LCOV_EXCL_STOP 2137 CeedSize l_size = 1; 2138 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2139 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2140 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2141 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2142 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2143 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2144 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2145 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2146 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2147 2148 // Build and diagonalize 1D Mass and Laplacian 2149 bool tensor_basis; 2150 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2151 if (!tensor_basis) 2152 // LCOV_EXCL_START 2153 return CeedError(ceed, CEED_ERROR_BACKEND, 2154 "FDMElementInverse only supported for tensor " 2155 "bases"); 2156 // LCOV_EXCL_STOP 2157 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2158 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2159 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2160 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2161 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2162 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2163 // -- Build matrices 2164 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2165 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2166 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2167 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2168 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2169 mass, laplace); CeedChk(ierr); 2170 2171 // -- Diagonalize 2172 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2173 CeedChk(ierr); 2174 ierr = CeedFree(&mass); CeedChk(ierr); 2175 ierr = CeedFree(&laplace); CeedChk(ierr); 2176 for (CeedInt i=0; i<P_1d; i++) 2177 for (CeedInt j=0; j<P_1d; j++) 2178 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2179 ierr = CeedFree(&x); CeedChk(ierr); 2180 2181 // Assemble QFunction 2182 CeedVector assembled; 2183 CeedElemRestriction rstr_qf; 2184 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2185 &rstr_qf, request); CeedChk(ierr); 2186 CeedInt layout[3]; 2187 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2188 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2189 CeedScalar max_norm = 0; 2190 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2191 2192 // Calculate element averages 2193 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2194 CeedScalar *elem_avg; 2195 const CeedScalar *assembled_array, *q_weight_array; 2196 CeedVector q_weight; 2197 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2198 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2199 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2200 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2201 CeedChk(ierr); 2202 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2203 CeedChk(ierr); 2204 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2205 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2206 for (CeedInt e=0; e<num_elem; e++) { 2207 CeedInt count = 0; 2208 for (CeedInt q=0; q<num_qpts; q++) 2209 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2210 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2211 qf_value_bound) { 2212 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2213 q_weight_array[q]; 2214 count++; 2215 } 2216 if (count) { 2217 elem_avg[e] /= count; 2218 } else { 2219 elem_avg[e] = 1.0; 2220 } 2221 } 2222 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2223 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2224 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2225 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2226 2227 // Build FDM diagonal 2228 CeedVector q_data; 2229 CeedScalar *q_data_array, *fdm_diagonal; 2230 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2231 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2232 for (CeedInt c=0; c<num_comp; c++) 2233 for (CeedInt n=0; n<elem_size; n++) { 2234 if (interp) 2235 fdm_diagonal[c*elem_size + n] = 1.0; 2236 if (grad) 2237 for (CeedInt d=0; d<dim; d++) { 2238 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2239 fdm_diagonal[c*elem_size + n] += lambda[i]; 2240 } 2241 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2242 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2243 } 2244 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2245 CeedChk(ierr); 2246 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2247 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2248 CeedChk(ierr); 2249 for (CeedInt e=0; e<num_elem; e++) 2250 for (CeedInt c=0; c<num_comp; c++) 2251 for (CeedInt n=0; n<elem_size; n++) 2252 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2253 fdm_diagonal[c*elem_size + n]); 2254 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2255 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2256 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2257 2258 // Setup FDM operator 2259 // -- Basis 2260 CeedBasis fdm_basis; 2261 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2262 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2263 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2264 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2265 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2266 fdm_interp, grad_dummy, q_ref_dummy, 2267 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2268 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2269 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2270 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2271 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2272 ierr = CeedFree(&lambda); CeedChk(ierr); 2273 2274 // -- Restriction 2275 CeedElemRestriction rstr_qd_i; 2276 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2277 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2278 num_comp, num_elem*num_comp*elem_size, 2279 strides, &rstr_qd_i); CeedChk(ierr); 2280 // -- QFunction 2281 CeedQFunction qf_fdm; 2282 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2283 CeedChk(ierr); 2284 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2285 CeedChk(ierr); 2286 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2287 CeedChk(ierr); 2288 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2289 CeedChk(ierr); 2290 ierr = CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp); CeedChk(ierr); 2291 // -- QFunction context 2292 CeedInt *num_comp_data; 2293 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2294 num_comp_data[0] = num_comp; 2295 CeedQFunctionContext ctx_fdm; 2296 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2297 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2298 sizeof(*num_comp_data), num_comp_data); 2299 CeedChk(ierr); 2300 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2301 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2302 // -- Operator 2303 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2304 CeedChk(ierr); 2305 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2306 CeedChk(ierr); 2307 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2308 q_data); CeedChk(ierr); 2309 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2310 CeedChk(ierr); 2311 2312 // Cleanup 2313 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2314 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2315 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2316 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2317 2318 return CEED_ERROR_SUCCESS; 2319 } 2320 2321 /// @} 2322