1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed-impl.h> 11 #include <math.h> 12 #include <stdbool.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 /// @file 17 /// Implementation of CeedOperator preconditioning interfaces 18 19 /// ---------------------------------------------------------------------------- 20 /// CeedOperator Library Internal Preconditioning Functions 21 /// ---------------------------------------------------------------------------- 22 /// @addtogroup CeedOperatorDeveloper 23 /// @{ 24 25 /** 26 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 27 CeedOperator functionality 28 29 @param op CeedOperator to create fallback for 30 31 @return An error code: 0 - success, otherwise - failure 32 33 @ref Developer 34 **/ 35 int CeedOperatorCreateFallback(CeedOperator op) { 36 int ierr; 37 Ceed fallback_ceed; 38 39 // Check not already created 40 if (op->op_fallback) return CEED_ERROR_SUCCESS; 41 42 // Fallback Ceed 43 ierr = CeedGetOperatorFallbackCeed(op->ceed, &fallback_ceed); CeedChk(ierr); 44 45 // Clone Op 46 CeedOperator op_fallback; 47 if (op->is_composite) { 48 ierr = CeedCompositeOperatorCreate(fallback_ceed, &op_fallback); 49 CeedChk(ierr); 50 for (CeedInt i = 0; i < op->num_suboperators; i++) { 51 ierr = CeedCompositeOperatorAddSub(op_fallback, op->sub_operators[i]); 52 CeedChk(ierr); 53 } 54 } else { 55 ierr = CeedOperatorCreate(fallback_ceed, op->qf, op->dqf, op->dqfT, 56 &op_fallback); CeedChk(ierr); 57 for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { 58 ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, 59 op->input_fields[i]->elem_restr, 60 op->input_fields[i]->basis, 61 op->input_fields[i]->vec); CeedChk(ierr); 62 } 63 for (CeedInt i = 0; i < op->qf->num_output_fields; i++) { 64 ierr = CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, 65 op->output_fields[i]->elem_restr, 66 op->output_fields[i]->basis, 67 op->output_fields[i]->vec); CeedChk(ierr); 68 } 69 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 70 &op_fallback->qf_assembled); CeedChk(ierr); 71 if (op_fallback->num_qpts == 0) { 72 ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts); 73 CeedChk(ierr); 74 } 75 } 76 ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr); 77 ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr); 78 op->op_fallback = op_fallback; 79 80 return CEED_ERROR_SUCCESS; 81 } 82 83 /** 84 @brief Select correct basis matrix pointer based on CeedEvalMode 85 86 @param[in] eval_mode Current basis evaluation mode 87 @param[in] identity Pointer to identity matrix 88 @param[in] interp Pointer to interpolation matrix 89 @param[in] grad Pointer to gradient matrix 90 @param[out] basis_ptr Basis pointer to set 91 92 @ref Developer 93 **/ 94 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 95 const CeedScalar *identity, const CeedScalar *interp, 96 const CeedScalar *grad, const CeedScalar **basis_ptr) { 97 switch (eval_mode) { 98 case CEED_EVAL_NONE: 99 *basis_ptr = identity; 100 break; 101 case CEED_EVAL_INTERP: 102 *basis_ptr = interp; 103 break; 104 case CEED_EVAL_GRAD: 105 *basis_ptr = grad; 106 break; 107 case CEED_EVAL_WEIGHT: 108 case CEED_EVAL_DIV: 109 case CEED_EVAL_CURL: 110 break; // Caught by QF Assembly 111 } 112 } 113 114 /** 115 @brief Create point block restriction for active operator field 116 117 @param[in] rstr Original CeedElemRestriction for active field 118 @param[out] pointblock_rstr Address of the variable where the newly created 119 CeedElemRestriction will be stored 120 121 @return An error code: 0 - success, otherwise - failure 122 123 @ref Developer 124 **/ 125 static int CeedOperatorCreateActivePointBlockRestriction( 126 CeedElemRestriction rstr, 127 CeedElemRestriction *pointblock_rstr) { 128 int ierr; 129 Ceed ceed; 130 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 131 const CeedInt *offsets; 132 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 133 CeedChk(ierr); 134 135 // Expand offsets 136 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 137 *pointblock_offsets; 138 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 139 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 140 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 141 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 142 CeedInt shift = num_comp; 143 if (comp_stride != 1) 144 shift *= num_comp; 145 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 146 CeedChk(ierr); 147 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 148 pointblock_offsets[i] = offsets[i]*shift; 149 if (pointblock_offsets[i] > max) 150 max = pointblock_offsets[i]; 151 } 152 153 // Create new restriction 154 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 155 1, max + num_comp*num_comp, CEED_MEM_HOST, 156 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 157 CeedChk(ierr); 158 159 // Cleanup 160 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 161 162 return CEED_ERROR_SUCCESS; 163 } 164 165 /** 166 @brief Core logic for assembling operator diagonal or point block diagonal 167 168 @param[in] op CeedOperator to assemble point block diagonal 169 @param[in] request Address of CeedRequest for non-blocking completion, else 170 CEED_REQUEST_IMMEDIATE 171 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 172 @param[out] assembled CeedVector to store assembled diagonal 173 174 @return An error code: 0 - success, otherwise - failure 175 176 @ref Developer 177 **/ 178 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 179 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 180 int ierr; 181 Ceed ceed; 182 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 183 184 // Assemble QFunction 185 CeedQFunction qf; 186 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 187 CeedInt num_input_fields, num_output_fields; 188 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 189 CeedChk(ierr); 190 CeedVector assembled_qf; 191 CeedElemRestriction rstr; 192 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 193 &rstr, request); CeedChk(ierr); 194 CeedInt layout[3]; 195 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 196 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 197 CeedScalar max_norm = 0; 198 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 199 200 // Determine active input basis 201 CeedOperatorField *op_fields; 202 CeedQFunctionField *qf_fields; 203 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 204 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 205 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 206 CeedEvalMode *eval_mode_in = NULL; 207 CeedBasis basis_in = NULL; 208 CeedElemRestriction rstr_in = NULL; 209 for (CeedInt i=0; i<num_input_fields; i++) { 210 CeedVector vec; 211 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 212 if (vec == CEED_VECTOR_ACTIVE) { 213 CeedElemRestriction rstr; 214 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 215 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 216 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 217 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 218 if (rstr_in && rstr_in != rstr) 219 // LCOV_EXCL_START 220 return CeedError(ceed, CEED_ERROR_BACKEND, 221 "Multi-field non-composite operator diagonal assembly not supported"); 222 // LCOV_EXCL_STOP 223 rstr_in = rstr; 224 CeedEvalMode eval_mode; 225 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 226 switch (eval_mode) { 227 case CEED_EVAL_NONE: 228 case CEED_EVAL_INTERP: 229 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 230 eval_mode_in[num_eval_mode_in] = eval_mode; 231 num_eval_mode_in += 1; 232 break; 233 case CEED_EVAL_GRAD: 234 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 235 for (CeedInt d=0; d<dim; d++) 236 eval_mode_in[num_eval_mode_in+d] = eval_mode; 237 num_eval_mode_in += dim; 238 break; 239 case CEED_EVAL_WEIGHT: 240 case CEED_EVAL_DIV: 241 case CEED_EVAL_CURL: 242 break; // Caught by QF Assembly 243 } 244 } 245 } 246 247 // Determine active output basis 248 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 249 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 250 CeedInt num_eval_mode_out = 0; 251 CeedEvalMode *eval_mode_out = NULL; 252 CeedBasis basis_out = NULL; 253 CeedElemRestriction rstr_out = NULL; 254 for (CeedInt i=0; i<num_output_fields; i++) { 255 CeedVector vec; 256 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 257 if (vec == CEED_VECTOR_ACTIVE) { 258 CeedElemRestriction rstr; 259 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 260 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 261 if (rstr_out && rstr_out != rstr) 262 // LCOV_EXCL_START 263 return CeedError(ceed, CEED_ERROR_BACKEND, 264 "Multi-field non-composite operator diagonal assembly not supported"); 265 // LCOV_EXCL_STOP 266 rstr_out = rstr; 267 CeedEvalMode eval_mode; 268 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 269 switch (eval_mode) { 270 case CEED_EVAL_NONE: 271 case CEED_EVAL_INTERP: 272 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 273 eval_mode_out[num_eval_mode_out] = eval_mode; 274 num_eval_mode_out += 1; 275 break; 276 case CEED_EVAL_GRAD: 277 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 278 for (CeedInt d=0; d<dim; d++) 279 eval_mode_out[num_eval_mode_out+d] = eval_mode; 280 num_eval_mode_out += dim; 281 break; 282 case CEED_EVAL_WEIGHT: 283 case CEED_EVAL_DIV: 284 case CEED_EVAL_CURL: 285 break; // Caught by QF Assembly 286 } 287 } 288 } 289 290 // Assemble point block diagonal restriction, if needed 291 CeedElemRestriction diag_rstr = rstr_out; 292 if (is_pointblock) { 293 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 294 CeedChk(ierr); 295 } 296 297 // Create diagonal vector 298 CeedVector elem_diag; 299 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 300 CeedChk(ierr); 301 302 // Assemble element operator diagonals 303 CeedScalar *elem_diag_array; 304 const CeedScalar *assembled_qf_array; 305 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 306 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 307 CeedChk(ierr); 308 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 309 CeedChk(ierr); 310 CeedInt num_elem, num_nodes, num_qpts; 311 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 312 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 313 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 314 // Basis matrices 315 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 316 CeedScalar *identity = NULL; 317 bool evalNone = false; 318 for (CeedInt i=0; i<num_eval_mode_in; i++) 319 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 320 for (CeedInt i=0; i<num_eval_mode_out; i++) 321 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 322 if (evalNone) { 323 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 324 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 325 identity[i*num_nodes+i] = 1.0; 326 } 327 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 328 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 329 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 330 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 331 // Compute the diagonal of B^T D B 332 // Each element 333 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 334 for (CeedInt e=0; e<num_elem; e++) { 335 CeedInt d_out = -1; 336 // Each basis eval mode pair 337 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 338 const CeedScalar *bt = NULL; 339 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 340 d_out += 1; 341 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 342 &grad_out[d_out*num_qpts*num_nodes], &bt); 343 CeedInt d_in = -1; 344 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 345 const CeedScalar *b = NULL; 346 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 347 d_in += 1; 348 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 349 &grad_in[d_in*num_qpts*num_nodes], &b); 350 // Each component 351 for (CeedInt c_out=0; c_out<num_comp; c_out++) 352 // Each qpoint/node pair 353 for (CeedInt q=0; q<num_qpts; q++) 354 if (is_pointblock) { 355 // Point Block Diagonal 356 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 357 const CeedScalar qf_value = 358 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 359 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 360 if (fabs(qf_value) > qf_value_bound) 361 for (CeedInt n=0; n<num_nodes; n++) 362 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 363 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 364 } 365 } else { 366 // Diagonal Only 367 const CeedScalar qf_value = 368 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 369 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 370 if (fabs(qf_value) > qf_value_bound) 371 for (CeedInt n=0; n<num_nodes; n++) 372 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 373 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 374 } 375 } 376 } 377 } 378 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 379 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 380 CeedChk(ierr); 381 382 // Assemble local operator diagonal 383 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 384 assembled, request); CeedChk(ierr); 385 386 // Cleanup 387 if (is_pointblock) { 388 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 389 } 390 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 391 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 392 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 393 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 394 ierr = CeedFree(&identity); CeedChk(ierr); 395 396 return CEED_ERROR_SUCCESS; 397 } 398 399 /** 400 @brief Core logic for assembling composite operator diagonal 401 402 @param[in] op CeedOperator to assemble point block diagonal 403 @param[in] request Address of CeedRequest for non-blocking completion, else 404 CEED_REQUEST_IMMEDIATE 405 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 406 @param[out] assembled CeedVector to store assembled diagonal 407 408 @return An error code: 0 - success, otherwise - failure 409 410 @ref Developer 411 **/ 412 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 413 CeedOperator op, CeedRequest *request, const bool is_pointblock, 414 CeedVector assembled) { 415 int ierr; 416 CeedInt num_sub; 417 CeedOperator *suboperators; 418 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 419 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 420 for (CeedInt i = 0; i < num_sub; i++) { 421 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 422 is_pointblock, assembled); CeedChk(ierr); 423 } 424 return CEED_ERROR_SUCCESS; 425 } 426 427 /** 428 @brief Build nonzero pattern for non-composite operator 429 430 Users should generally use CeedOperatorLinearAssembleSymbolic() 431 432 @param[in] op CeedOperator to assemble nonzero pattern 433 @param[in] offset Offset for number of entries 434 @param[out] rows Row number for each entry 435 @param[out] cols Column number for each entry 436 437 @return An error code: 0 - success, otherwise - failure 438 439 @ref Developer 440 **/ 441 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 442 CeedInt *rows, CeedInt *cols) { 443 int ierr; 444 Ceed ceed = op->ceed; 445 if (op->is_composite) 446 // LCOV_EXCL_START 447 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 448 "Composite operator not supported"); 449 // LCOV_EXCL_STOP 450 451 CeedSize num_nodes; 452 ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr); 453 CeedElemRestriction rstr_in; 454 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 455 CeedInt num_elem, elem_size, num_comp; 456 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 457 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 458 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 459 CeedInt layout_er[3]; 460 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 461 462 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 463 464 // Determine elem_dof relation 465 CeedVector index_vec; 466 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 467 CeedScalar *array; 468 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 469 for (CeedInt i = 0; i < num_nodes; ++i) { 470 array[i] = i; 471 } 472 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 473 CeedVector elem_dof; 474 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 475 CeedChk(ierr); 476 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 477 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 478 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 479 const CeedScalar *elem_dof_a; 480 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 481 CeedChk(ierr); 482 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 483 484 // Determine i, j locations for element matrices 485 CeedInt count = 0; 486 for (CeedInt e = 0; e < num_elem; ++e) { 487 for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) { 488 for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) { 489 for (CeedInt i = 0; i < elem_size; ++i) { 490 for (CeedInt j = 0; j < elem_size; ++j) { 491 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 492 (comp_out)*layout_er[1] + e*layout_er[2]; 493 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 494 (comp_in)*layout_er[1] + e*layout_er[2]; 495 496 const CeedInt row = elem_dof_a[elem_dof_index_row]; 497 const CeedInt col = elem_dof_a[elem_dof_index_col]; 498 499 rows[offset + count] = row; 500 cols[offset + count] = col; 501 count++; 502 } 503 } 504 } 505 } 506 } 507 if (count != local_num_entries) 508 // LCOV_EXCL_START 509 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 510 // LCOV_EXCL_STOP 511 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 512 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 513 514 return CEED_ERROR_SUCCESS; 515 } 516 517 /** 518 @brief Assemble nonzero entries for non-composite operator 519 520 Users should generally use CeedOperatorLinearAssemble() 521 522 @param[in] op CeedOperator to assemble 523 @param[out] offset Offest for number of entries 524 @param[out] values Values to assemble into matrix 525 526 @return An error code: 0 - success, otherwise - failure 527 528 @ref Developer 529 **/ 530 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 531 CeedVector values) { 532 int ierr; 533 Ceed ceed = op->ceed; 534 if (op->is_composite) 535 // LCOV_EXCL_START 536 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 537 "Composite operator not supported"); 538 // LCOV_EXCL_STOP 539 540 // Assemble QFunction 541 CeedQFunction qf; 542 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 543 CeedVector assembled_qf; 544 CeedElemRestriction rstr_q; 545 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 546 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 547 548 CeedSize qf_length; 549 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 550 551 CeedInt num_input_fields, num_output_fields; 552 CeedOperatorField *input_fields; 553 CeedOperatorField *output_fields; 554 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 555 &num_output_fields, &output_fields); CeedChk(ierr); 556 557 // Determine active input basis 558 CeedQFunctionField *qf_fields; 559 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 560 CeedInt num_eval_mode_in = 0, dim = 1; 561 CeedEvalMode *eval_mode_in = NULL; 562 CeedBasis basis_in = NULL; 563 CeedElemRestriction rstr_in = NULL; 564 for (CeedInt i=0; i<num_input_fields; i++) { 565 CeedVector vec; 566 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 567 if (vec == CEED_VECTOR_ACTIVE) { 568 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 569 CeedChk(ierr); 570 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 571 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 572 CeedChk(ierr); 573 CeedEvalMode eval_mode; 574 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 575 CeedChk(ierr); 576 switch (eval_mode) { 577 case CEED_EVAL_NONE: 578 case CEED_EVAL_INTERP: 579 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 580 eval_mode_in[num_eval_mode_in] = eval_mode; 581 num_eval_mode_in += 1; 582 break; 583 case CEED_EVAL_GRAD: 584 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 585 for (CeedInt d=0; d<dim; d++) { 586 eval_mode_in[num_eval_mode_in+d] = eval_mode; 587 } 588 num_eval_mode_in += dim; 589 break; 590 case CEED_EVAL_WEIGHT: 591 case CEED_EVAL_DIV: 592 case CEED_EVAL_CURL: 593 break; // Caught by QF Assembly 594 } 595 } 596 } 597 598 // Determine active output basis 599 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 600 CeedInt num_eval_mode_out = 0; 601 CeedEvalMode *eval_mode_out = NULL; 602 CeedBasis basis_out = NULL; 603 CeedElemRestriction rstr_out = NULL; 604 for (CeedInt i=0; i<num_output_fields; i++) { 605 CeedVector vec; 606 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 607 if (vec == CEED_VECTOR_ACTIVE) { 608 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 609 CeedChk(ierr); 610 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 611 CeedChk(ierr); 612 CeedEvalMode eval_mode; 613 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 614 CeedChk(ierr); 615 switch (eval_mode) { 616 case CEED_EVAL_NONE: 617 case CEED_EVAL_INTERP: 618 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 619 eval_mode_out[num_eval_mode_out] = eval_mode; 620 num_eval_mode_out += 1; 621 break; 622 case CEED_EVAL_GRAD: 623 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 624 for (CeedInt d=0; d<dim; d++) { 625 eval_mode_out[num_eval_mode_out+d] = eval_mode; 626 } 627 num_eval_mode_out += dim; 628 break; 629 case CEED_EVAL_WEIGHT: 630 case CEED_EVAL_DIV: 631 case CEED_EVAL_CURL: 632 break; // Caught by QF Assembly 633 } 634 } 635 } 636 637 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 638 // LCOV_EXCL_START 639 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 640 "Cannot assemble operator with out inputs/outputs"); 641 // LCOV_EXCL_STOP 642 643 CeedInt num_elem, elem_size, num_qpts, num_comp; 644 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 645 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 646 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 647 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 648 649 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 650 651 // loop over elements and put in data structure 652 const CeedScalar *interp_in, *grad_in; 653 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 654 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 655 656 const CeedScalar *assembled_qf_array; 657 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 658 CeedChk(ierr); 659 660 CeedInt layout_qf[3]; 661 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 662 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 663 664 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 665 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 666 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 667 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 668 num_qpts]; // logically 3-tensor 669 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 670 CeedScalar elem_mat[elem_size * elem_size]; 671 CeedInt count = 0; 672 CeedScalar *vals; 673 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 674 for (CeedInt e = 0; e < num_elem; ++e) { 675 for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) { 676 for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) { 677 for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 678 B_mat_in[ell] = 0.0; 679 } 680 for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 681 B_mat_out[ell] = 0.0; 682 } 683 // Store block-diagonal D matrix as collection of small dense blocks 684 for (CeedInt ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 685 D_mat[ell] = 0.0; 686 } 687 // form element matrix itself (for each block component) 688 for (CeedInt ell = 0; ell < elem_size*elem_size; ++ell) { 689 elem_mat[ell] = 0.0; 690 } 691 for (CeedInt q = 0; q < num_qpts; ++q) { 692 for (CeedInt n = 0; n < elem_size; ++n) { 693 CeedInt d_in = -1; 694 for (CeedInt e_in = 0; e_in < num_eval_mode_in; ++e_in) { 695 const CeedInt qq = num_eval_mode_in*q; 696 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 697 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 698 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 699 d_in += 1; 700 B_mat_in[(qq+e_in)*elem_size + n] += 701 grad_in[(d_in*num_qpts+q) * elem_size + n]; 702 } else { 703 // LCOV_EXCL_START 704 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 705 // LCOV_EXCL_STOP 706 } 707 } 708 CeedInt d_out = -1; 709 for (CeedInt e_out = 0; e_out < num_eval_mode_out; ++e_out) { 710 const CeedInt qq = num_eval_mode_out*q; 711 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 712 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 713 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 714 d_out += 1; 715 B_mat_out[(qq+e_out)*elem_size + n] += 716 grad_in[(d_out*num_qpts+q) * elem_size + n]; 717 } else { 718 // LCOV_EXCL_START 719 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 720 // LCOV_EXCL_STOP 721 } 722 } 723 } 724 for (CeedInt ei = 0; ei < num_eval_mode_out; ++ei) { 725 for (CeedInt ej = 0; ej < num_eval_mode_in; ++ej) { 726 const CeedInt eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 727 +comp_out; 728 const CeedInt index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 729 e*layout_qf[2]; 730 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 731 } 732 } 733 } 734 // Compute B^T*D 735 for (CeedInt ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 736 BTD[ell] = 0.0; 737 } 738 for (CeedInt j = 0; j<elem_size; ++j) { 739 for (CeedInt q = 0; q<num_qpts; ++q) { 740 const CeedInt qq = num_eval_mode_out*q; 741 for (CeedInt ei = 0; ei < num_eval_mode_in; ++ei) { 742 for (CeedInt ej = 0; ej < num_eval_mode_out; ++ej) { 743 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 744 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 745 } 746 } 747 } 748 } 749 750 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 751 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 752 753 // put element matrix in coordinate data structure 754 for (CeedInt i = 0; i < elem_size; ++i) { 755 for (CeedInt j = 0; j < elem_size; ++j) { 756 vals[offset + count] = elem_mat[i*elem_size + j]; 757 count++; 758 } 759 } 760 } 761 } 762 } 763 if (count != local_num_entries) 764 // LCOV_EXCL_START 765 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 766 // LCOV_EXCL_STOP 767 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 768 769 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 770 CeedChk(ierr); 771 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 772 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 773 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 774 775 return CEED_ERROR_SUCCESS; 776 } 777 778 /** 779 @brief Count number of entries for assembled CeedOperator 780 781 @param[in] op CeedOperator to assemble 782 @param[out] num_entries Number of entries in assembled representation 783 784 @return An error code: 0 - success, otherwise - failure 785 786 @ref Utility 787 **/ 788 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 789 CeedInt *num_entries) { 790 int ierr; 791 CeedElemRestriction rstr; 792 CeedInt num_elem, elem_size, num_comp; 793 794 if (op->is_composite) 795 // LCOV_EXCL_START 796 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 797 "Composite operator not supported"); 798 // LCOV_EXCL_STOP 799 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 800 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 801 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 802 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 803 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 804 805 return CEED_ERROR_SUCCESS; 806 } 807 808 /** 809 @brief Common code for creating a multigrid coarse operator and level 810 transfer operators for a CeedOperator 811 812 @param[in] op_fine Fine grid operator 813 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 814 @param[in] rstr_coarse Coarse grid restriction 815 @param[in] basis_coarse Coarse grid active vector basis 816 @param[in] basis_c_to_f Basis for coarse to fine interpolation 817 @param[out] op_coarse Coarse grid operator 818 @param[out] op_prolong Coarse to fine operator 819 @param[out] op_restrict Fine to coarse operator 820 821 @return An error code: 0 - success, otherwise - failure 822 823 @ref Developer 824 **/ 825 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 826 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 827 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 828 CeedOperator *op_restrict) { 829 int ierr; 830 Ceed ceed; 831 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 832 833 // Check for composite operator 834 bool is_composite; 835 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 836 if (is_composite) 837 // LCOV_EXCL_START 838 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 839 "Automatic multigrid setup for composite operators not supported"); 840 // LCOV_EXCL_STOP 841 842 // Coarse Grid 843 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 844 op_coarse); CeedChk(ierr); 845 CeedElemRestriction rstr_fine = NULL; 846 // -- Clone input fields 847 for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) { 848 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 849 rstr_fine = op_fine->input_fields[i]->elem_restr; 850 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 851 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 852 CeedChk(ierr); 853 } else { 854 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 855 op_fine->input_fields[i]->elem_restr, 856 op_fine->input_fields[i]->basis, 857 op_fine->input_fields[i]->vec); CeedChk(ierr); 858 } 859 } 860 // -- Clone output fields 861 for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) { 862 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 863 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 864 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 865 CeedChk(ierr); 866 } else { 867 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 868 op_fine->output_fields[i]->elem_restr, 869 op_fine->output_fields[i]->basis, 870 op_fine->output_fields[i]->vec); CeedChk(ierr); 871 } 872 } 873 // -- Clone QFunctionAssemblyData 874 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 875 &(*op_coarse)->qf_assembled); CeedChk(ierr); 876 877 // Multiplicity vector 878 CeedVector mult_vec, mult_e_vec; 879 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 880 CeedChk(ierr); 881 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 882 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 883 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 884 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 885 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 886 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 887 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 888 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 889 890 // Restriction 891 CeedInt num_comp; 892 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 893 CeedQFunction qf_restrict; 894 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 895 CeedChk(ierr); 896 CeedInt *num_comp_r_data; 897 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 898 num_comp_r_data[0] = num_comp; 899 CeedQFunctionContext ctx_r; 900 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 901 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 902 sizeof(*num_comp_r_data), num_comp_r_data); 903 CeedChk(ierr); 904 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 905 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 906 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 907 CeedChk(ierr); 908 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 909 CeedChk(ierr); 910 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 911 CEED_EVAL_INTERP); CeedChk(ierr); 912 ierr = CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp); CeedChk(ierr); 913 914 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 915 CEED_QFUNCTION_NONE, op_restrict); 916 CeedChk(ierr); 917 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 918 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 919 CeedChk(ierr); 920 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 921 CEED_BASIS_COLLOCATED, mult_vec); 922 CeedChk(ierr); 923 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 924 CEED_VECTOR_ACTIVE); CeedChk(ierr); 925 926 // Prolongation 927 CeedQFunction qf_prolong; 928 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 929 CeedChk(ierr); 930 CeedInt *num_comp_p_data; 931 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 932 num_comp_p_data[0] = num_comp; 933 CeedQFunctionContext ctx_p; 934 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 935 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 936 sizeof(*num_comp_p_data), num_comp_p_data); 937 CeedChk(ierr); 938 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 939 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 940 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 941 CeedChk(ierr); 942 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 943 CeedChk(ierr); 944 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 945 CeedChk(ierr); 946 ierr = CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp); CeedChk(ierr); 947 948 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 949 CEED_QFUNCTION_NONE, op_prolong); 950 CeedChk(ierr); 951 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 952 CEED_VECTOR_ACTIVE); CeedChk(ierr); 953 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 954 CEED_BASIS_COLLOCATED, mult_vec); 955 CeedChk(ierr); 956 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 957 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 958 CeedChk(ierr); 959 960 // Clone name 961 bool has_name = op_fine->name; 962 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 963 ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr); 964 { 965 char *prolongation_name; 966 ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr); 967 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", 968 op_fine->name); 969 ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr); 970 ierr = CeedFree(&prolongation_name); CeedChk(ierr); 971 } 972 { 973 char *restriction_name; 974 ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr); 975 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", 976 op_fine->name); 977 ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr); 978 ierr = CeedFree(&restriction_name); CeedChk(ierr); 979 } 980 981 // Cleanup 982 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 983 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 984 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 985 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 986 987 return CEED_ERROR_SUCCESS; 988 } 989 990 /** 991 @brief Build 1D mass matrix and Laplacian with perturbation 992 993 @param[in] interp_1d Interpolation matrix in one dimension 994 @param[in] grad_1d Gradient matrix in one dimension 995 @param[in] q_weight_1d Quadrature weights in one dimension 996 @param[in] P_1d Number of basis nodes in one dimension 997 @param[in] Q_1d Number of quadrature points in one dimension 998 @param[in] dim Dimension of basis 999 @param[out] mass Assembled mass matrix in one dimension 1000 @param[out] laplace Assembled perturbed Laplacian in one dimension 1001 1002 @return An error code: 0 - success, otherwise - failure 1003 1004 @ref Developer 1005 **/ 1006 CeedPragmaOptimizeOff 1007 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 1008 const CeedScalar *grad_1d, 1009 const CeedScalar *q_weight_1d, CeedInt P_1d, 1010 CeedInt Q_1d, CeedInt dim, 1011 CeedScalar *mass, CeedScalar *laplace) { 1012 1013 for (CeedInt i=0; i<P_1d; i++) 1014 for (CeedInt j=0; j<P_1d; j++) { 1015 CeedScalar sum = 0.0; 1016 for (CeedInt k=0; k<Q_1d; k++) 1017 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1018 mass[i+j*P_1d] = sum; 1019 } 1020 // -- Laplacian 1021 for (CeedInt i=0; i<P_1d; i++) 1022 for (CeedInt j=0; j<P_1d; j++) { 1023 CeedScalar sum = 0.0; 1024 for (CeedInt k=0; k<Q_1d; k++) 1025 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1026 laplace[i+j*P_1d] = sum; 1027 } 1028 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1029 for (CeedInt i=0; i<P_1d; i++) 1030 laplace[i+P_1d*i] += perturbation; 1031 return CEED_ERROR_SUCCESS; 1032 } 1033 CeedPragmaOptimizeOn 1034 1035 /// @} 1036 1037 /// ---------------------------------------------------------------------------- 1038 /// CeedOperator Backend API 1039 /// ---------------------------------------------------------------------------- 1040 /// @addtogroup CeedOperatorBackend 1041 /// @{ 1042 1043 /** 1044 @brief Create object holding CeedQFunction assembly data for CeedOperator 1045 1046 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1047 @param[out] data Address of the variable where the newly created 1048 CeedQFunctionAssemblyData will be stored 1049 1050 @return An error code: 0 - success, otherwise - failure 1051 1052 @ref Backend 1053 **/ 1054 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1055 CeedQFunctionAssemblyData *data) { 1056 int ierr; 1057 1058 ierr = CeedCalloc(1, data); CeedChk(ierr); 1059 (*data)->ref_count = 1; 1060 (*data)->ceed = ceed; 1061 ierr = CeedReference(ceed); CeedChk(ierr); 1062 1063 return CEED_ERROR_SUCCESS; 1064 } 1065 1066 /** 1067 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1068 1069 @param data CeedQFunctionAssemblyData to increment the reference counter 1070 1071 @return An error code: 0 - success, otherwise - failure 1072 1073 @ref Backend 1074 **/ 1075 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1076 data->ref_count++; 1077 return CEED_ERROR_SUCCESS; 1078 } 1079 1080 /** 1081 @brief Set re-use of CeedQFunctionAssemblyData 1082 1083 @param data CeedQFunctionAssemblyData to mark for reuse 1084 @param reuse_data Boolean flag indicating data re-use 1085 1086 @return An error code: 0 - success, otherwise - failure 1087 1088 @ref Backend 1089 **/ 1090 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, 1091 bool reuse_data) { 1092 data->reuse_data = reuse_data; 1093 data->needs_data_update = true; 1094 return CEED_ERROR_SUCCESS; 1095 } 1096 1097 /** 1098 @brief Mark QFunctionAssemblyData as stale 1099 1100 @param data CeedQFunctionAssemblyData to mark as stale 1101 @param needs_data_update Boolean flag indicating if update is needed or completed 1102 1103 @return An error code: 0 - success, otherwise - failure 1104 1105 @ref Backend 1106 **/ 1107 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, 1108 bool needs_data_update) { 1109 data->needs_data_update = needs_data_update; 1110 return CEED_ERROR_SUCCESS; 1111 } 1112 1113 /** 1114 @brief Determine if QFunctionAssemblyData needs update 1115 1116 @param[in] data CeedQFunctionAssemblyData to mark as stale 1117 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1118 1119 @return An error code: 0 - success, otherwise - failure 1120 1121 @ref Backend 1122 **/ 1123 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1124 bool *is_update_needed) { 1125 *is_update_needed = !data->reuse_data || data->needs_data_update; 1126 return CEED_ERROR_SUCCESS; 1127 } 1128 1129 /** 1130 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1131 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1132 Note: If `*data_copy` is non-NULL, then it is assumed that 1133 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1134 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1135 the only reference to this CeedQFunctionAssemblyData. 1136 1137 @param data CeedQFunctionAssemblyData to copy reference to 1138 @param[out] data_copy Variable to store copied reference 1139 1140 @return An error code: 0 - success, otherwise - failure 1141 1142 @ref Backend 1143 **/ 1144 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1145 CeedQFunctionAssemblyData *data_copy) { 1146 int ierr; 1147 1148 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1149 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1150 *data_copy = data; 1151 return CEED_ERROR_SUCCESS; 1152 } 1153 1154 /** 1155 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1156 1157 @param[in] data CeedQFunctionAssemblyData to retreive status 1158 @param[out] is_setup Boolean flag for setup status 1159 1160 @return An error code: 0 - success, otherwise - failure 1161 1162 @ref Backend 1163 **/ 1164 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1165 bool *is_setup) { 1166 *is_setup = data->is_setup; 1167 return CEED_ERROR_SUCCESS; 1168 } 1169 1170 /** 1171 @brief Set internal objects for CeedQFunctionAssemblyData 1172 1173 @param[in] data CeedQFunctionAssemblyData to set objects 1174 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1175 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1176 1177 @return An error code: 0 - success, otherwise - failure 1178 1179 @ref Backend 1180 **/ 1181 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1182 CeedVector vec, CeedElemRestriction rstr) { 1183 int ierr; 1184 1185 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1186 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1187 1188 data->is_setup = true; 1189 return CEED_ERROR_SUCCESS; 1190 } 1191 1192 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1193 CeedVector *vec, CeedElemRestriction *rstr) { 1194 int ierr; 1195 1196 if (!data->is_setup) 1197 // LCOV_EXCL_START 1198 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1199 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1200 // LCOV_EXCL_STOP 1201 1202 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1203 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1204 1205 return CEED_ERROR_SUCCESS; 1206 } 1207 1208 /** 1209 @brief Destroy CeedQFunctionAssemblyData 1210 1211 @param[out] data CeedQFunctionAssemblyData to destroy 1212 1213 @return An error code: 0 - success, otherwise - failure 1214 1215 @ref Backend 1216 **/ 1217 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1218 int ierr; 1219 1220 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1221 1222 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1223 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1224 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1225 1226 ierr = CeedFree(data); CeedChk(ierr); 1227 return CEED_ERROR_SUCCESS; 1228 } 1229 1230 /// @} 1231 1232 /// ---------------------------------------------------------------------------- 1233 /// CeedOperator Public API 1234 /// ---------------------------------------------------------------------------- 1235 /// @addtogroup CeedOperatorUser 1236 /// @{ 1237 1238 /** 1239 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1240 1241 This returns a CeedVector containing a matrix at each quadrature point 1242 providing the action of the CeedQFunction associated with the CeedOperator. 1243 The vector 'assembled' is of shape 1244 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1245 and contains column-major matrices representing the action of the 1246 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1247 outputs are in the order provided by the user when adding CeedOperator fields. 1248 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1249 'v', provided in that order, would result in an assembled QFunction that 1250 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1251 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1252 1253 Note: Calling this function asserts that setup is complete 1254 and sets the CeedOperator as immutable. 1255 1256 @param op CeedOperator to assemble CeedQFunction 1257 @param[out] assembled CeedVector to store assembled CeedQFunction at 1258 quadrature points 1259 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1260 CeedQFunction 1261 @param request Address of CeedRequest for non-blocking completion, else 1262 @ref CEED_REQUEST_IMMEDIATE 1263 1264 @return An error code: 0 - success, otherwise - failure 1265 1266 @ref User 1267 **/ 1268 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1269 CeedElemRestriction *rstr, 1270 CeedRequest *request) { 1271 int ierr; 1272 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1273 1274 // Backend version 1275 if (op->LinearAssembleQFunction) { 1276 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1277 CeedChk(ierr); 1278 } else { 1279 // Fallback to reference Ceed 1280 if (!op->op_fallback) { 1281 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1282 } 1283 // Assemble 1284 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1285 rstr, request); CeedChk(ierr); 1286 } 1287 return CEED_ERROR_SUCCESS; 1288 } 1289 1290 /** 1291 @brief Assemble CeedQFunction and store result internall. Return copied 1292 references of stored data to the caller. Caller is responsible for 1293 ownership and destruction of the copied references. See also 1294 @ref CeedOperatorLinearAssembleQFunction 1295 1296 @param op CeedOperator to assemble CeedQFunction 1297 @param assembled CeedVector to store assembled CeedQFunction at 1298 quadrature points 1299 @param rstr CeedElemRestriction for CeedVector containing assembled 1300 CeedQFunction 1301 @param request Address of CeedRequest for non-blocking completion, else 1302 @ref CEED_REQUEST_IMMEDIATE 1303 1304 @return An error code: 0 - success, otherwise - failure 1305 1306 @ref User 1307 **/ 1308 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1309 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1310 int ierr; 1311 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1312 1313 // Backend version 1314 if (op->LinearAssembleQFunctionUpdate) { 1315 bool qf_assembled_is_setup; 1316 CeedVector assembled_vec = NULL; 1317 CeedElemRestriction assembled_rstr = NULL; 1318 1319 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1320 &qf_assembled_is_setup); CeedChk(ierr); 1321 if (qf_assembled_is_setup) { 1322 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1323 &assembled_rstr); CeedChk(ierr); 1324 1325 bool update_needed; 1326 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1327 &update_needed); CeedChk(ierr); 1328 if (update_needed) { 1329 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1330 request); CeedChk(ierr); 1331 } 1332 } else { 1333 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1334 request); CeedChk(ierr); 1335 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1336 assembled_rstr); CeedChk(ierr); 1337 } 1338 ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); 1339 CeedChk(ierr); 1340 1341 // Copy reference to internally held copy 1342 *assembled = NULL; 1343 *rstr = NULL; 1344 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1345 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1346 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1347 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1348 } else { 1349 // Fallback to reference Ceed 1350 if (!op->op_fallback) { 1351 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1352 } 1353 // Assemble 1354 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1355 assembled, rstr, request); CeedChk(ierr); 1356 } 1357 1358 return CEED_ERROR_SUCCESS; 1359 } 1360 1361 /** 1362 @brief Assemble the diagonal of a square linear CeedOperator 1363 1364 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1365 1366 Note: Currently only non-composite CeedOperators with a single field and 1367 composite CeedOperators with single field sub-operators are supported. 1368 1369 Note: Calling this function asserts that setup is complete 1370 and sets the CeedOperator as immutable. 1371 1372 @param op CeedOperator to assemble CeedQFunction 1373 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1374 @param request Address of CeedRequest for non-blocking completion, else 1375 @ref CEED_REQUEST_IMMEDIATE 1376 1377 @return An error code: 0 - success, otherwise - failure 1378 1379 @ref User 1380 **/ 1381 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1382 CeedRequest *request) { 1383 int ierr; 1384 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1385 1386 CeedSize input_size = 0, output_size = 0; 1387 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1388 CeedChk(ierr); 1389 if (input_size != output_size) 1390 // LCOV_EXCL_START 1391 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1392 // LCOV_EXCL_STOP 1393 1394 // Use backend version, if available 1395 if (op->LinearAssembleDiagonal) { 1396 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1397 return CEED_ERROR_SUCCESS; 1398 } else if (op->LinearAssembleAddDiagonal) { 1399 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1400 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1401 return CEED_ERROR_SUCCESS; 1402 } else { 1403 // Check for valid fallback resource 1404 const char *resource, *fallback_resource; 1405 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1406 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1407 CeedChk(ierr); 1408 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1409 // Fallback to reference Ceed 1410 if (!op->op_fallback) { 1411 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1412 } 1413 // Assemble 1414 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1415 CeedChk(ierr); 1416 return CEED_ERROR_SUCCESS; 1417 } 1418 } 1419 1420 // Default interface implementation 1421 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1422 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1423 CeedChk(ierr); 1424 return CEED_ERROR_SUCCESS; 1425 } 1426 1427 /** 1428 @brief Assemble the diagonal of a square linear CeedOperator 1429 1430 This sums into a CeedVector the diagonal of a linear CeedOperator. 1431 1432 Note: Currently only non-composite CeedOperators with a single field and 1433 composite CeedOperators with single field sub-operators are supported. 1434 1435 Note: Calling this function asserts that setup is complete 1436 and sets the CeedOperator as immutable. 1437 1438 @param op CeedOperator to assemble CeedQFunction 1439 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1440 @param request Address of CeedRequest for non-blocking completion, else 1441 @ref CEED_REQUEST_IMMEDIATE 1442 1443 @return An error code: 0 - success, otherwise - failure 1444 1445 @ref User 1446 **/ 1447 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1448 CeedRequest *request) { 1449 int ierr; 1450 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1451 1452 CeedSize input_size = 0, output_size = 0; 1453 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1454 CeedChk(ierr); 1455 if (input_size != output_size) 1456 // LCOV_EXCL_START 1457 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1458 // LCOV_EXCL_STOP 1459 1460 // Use backend version, if available 1461 if (op->LinearAssembleAddDiagonal) { 1462 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1463 return CEED_ERROR_SUCCESS; 1464 } else { 1465 // Check for valid fallback resource 1466 const char *resource, *fallback_resource; 1467 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1468 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1469 CeedChk(ierr); 1470 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1471 // Fallback to reference Ceed 1472 if (!op->op_fallback) { 1473 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1474 } 1475 // Assemble 1476 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1477 request); CeedChk(ierr); 1478 return CEED_ERROR_SUCCESS; 1479 } 1480 } 1481 1482 // Default interface implementation 1483 bool is_composite; 1484 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1485 if (is_composite) { 1486 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1487 false, assembled); CeedChk(ierr); 1488 return CEED_ERROR_SUCCESS; 1489 } else { 1490 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1491 CeedChk(ierr); 1492 return CEED_ERROR_SUCCESS; 1493 } 1494 } 1495 1496 /** 1497 @brief Assemble the point block diagonal of a square linear CeedOperator 1498 1499 This overwrites a CeedVector with the point block diagonal of a linear 1500 CeedOperator. 1501 1502 Note: Currently only non-composite CeedOperators with a single field and 1503 composite CeedOperators with single field sub-operators are supported. 1504 1505 Note: Calling this function asserts that setup is complete 1506 and sets the CeedOperator as immutable. 1507 1508 @param op CeedOperator to assemble CeedQFunction 1509 @param[out] assembled CeedVector to store assembled CeedOperator point block 1510 diagonal, provided in row-major form with an 1511 @a num_comp * @a num_comp block at each node. The dimensions 1512 of this vector are derived from the active vector 1513 for the CeedOperator. The array has shape 1514 [nodes, component out, component in]. 1515 @param request Address of CeedRequest for non-blocking completion, else 1516 CEED_REQUEST_IMMEDIATE 1517 1518 @return An error code: 0 - success, otherwise - failure 1519 1520 @ref User 1521 **/ 1522 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1523 CeedVector assembled, CeedRequest *request) { 1524 int ierr; 1525 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1526 1527 CeedSize input_size = 0, output_size = 0; 1528 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1529 CeedChk(ierr); 1530 if (input_size != output_size) 1531 // LCOV_EXCL_START 1532 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1533 // LCOV_EXCL_STOP 1534 1535 // Use backend version, if available 1536 if (op->LinearAssemblePointBlockDiagonal) { 1537 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1538 CeedChk(ierr); 1539 return CEED_ERROR_SUCCESS; 1540 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1541 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1542 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1543 request); CeedChk(ierr); 1544 return CEED_ERROR_SUCCESS; 1545 } else { 1546 // Check for valid fallback resource 1547 const char *resource, *fallback_resource; 1548 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1549 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1550 CeedChk(ierr); 1551 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1552 // Fallback to reference Ceed 1553 if (!op->op_fallback) { 1554 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1555 } 1556 // Assemble 1557 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1558 assembled, request); CeedChk(ierr); 1559 return CEED_ERROR_SUCCESS; 1560 } 1561 } 1562 1563 // Default interface implementation 1564 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1565 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1566 CeedChk(ierr); 1567 return CEED_ERROR_SUCCESS; 1568 } 1569 1570 /** 1571 @brief Assemble the point block diagonal of a square linear CeedOperator 1572 1573 This sums into a CeedVector with the point block diagonal of a linear 1574 CeedOperator. 1575 1576 Note: Currently only non-composite CeedOperators with a single field and 1577 composite CeedOperators with single field sub-operators are supported. 1578 1579 Note: Calling this function asserts that setup is complete 1580 and sets the CeedOperator as immutable. 1581 1582 @param op CeedOperator to assemble CeedQFunction 1583 @param[out] assembled CeedVector to store assembled CeedOperator point block 1584 diagonal, provided in row-major form with an 1585 @a num_comp * @a num_comp block at each node. The dimensions 1586 of this vector are derived from the active vector 1587 for the CeedOperator. The array has shape 1588 [nodes, component out, component in]. 1589 @param request Address of CeedRequest for non-blocking completion, else 1590 CEED_REQUEST_IMMEDIATE 1591 1592 @return An error code: 0 - success, otherwise - failure 1593 1594 @ref User 1595 **/ 1596 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1597 CeedVector assembled, CeedRequest *request) { 1598 int ierr; 1599 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1600 1601 CeedSize input_size = 0, output_size = 0; 1602 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1603 CeedChk(ierr); 1604 if (input_size != output_size) 1605 // LCOV_EXCL_START 1606 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1607 // LCOV_EXCL_STOP 1608 1609 // Use backend version, if available 1610 if (op->LinearAssembleAddPointBlockDiagonal) { 1611 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1612 CeedChk(ierr); 1613 return CEED_ERROR_SUCCESS; 1614 } else { 1615 // Check for valid fallback resource 1616 const char *resource, *fallback_resource; 1617 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1618 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1619 CeedChk(ierr); 1620 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1621 // Fallback to reference Ceed 1622 if (!op->op_fallback) { 1623 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1624 } 1625 // Assemble 1626 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1627 assembled, request); CeedChk(ierr); 1628 return CEED_ERROR_SUCCESS; 1629 } 1630 } 1631 1632 // Default interface implemenation 1633 bool is_composite; 1634 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1635 if (is_composite) { 1636 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1637 true, assembled); CeedChk(ierr); 1638 return CEED_ERROR_SUCCESS; 1639 } else { 1640 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1641 CeedChk(ierr); 1642 return CEED_ERROR_SUCCESS; 1643 } 1644 } 1645 1646 /** 1647 @brief Fully assemble the nonzero pattern of a linear operator. 1648 1649 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1650 1651 The assembly routines use coordinate format, with num_entries tuples of the 1652 form (i, j, value) which indicate that value should be added to the matrix 1653 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1654 This function returns the number of entries and their (i, j) locations, 1655 while CeedOperatorLinearAssemble() provides the values in the same 1656 ordering. 1657 1658 This will generally be slow unless your operator is low-order. 1659 1660 Note: Calling this function asserts that setup is complete 1661 and sets the CeedOperator as immutable. 1662 1663 @param[in] op CeedOperator to assemble 1664 @param[out] num_entries Number of entries in coordinate nonzero pattern 1665 @param[out] rows Row number for each entry 1666 @param[out] cols Column number for each entry 1667 1668 @ref User 1669 **/ 1670 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1671 CeedInt **rows, CeedInt **cols) { 1672 int ierr; 1673 CeedInt num_suboperators, single_entries; 1674 CeedOperator *sub_operators; 1675 bool is_composite; 1676 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1677 1678 // Use backend version, if available 1679 if (op->LinearAssembleSymbolic) { 1680 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1681 return CEED_ERROR_SUCCESS; 1682 } else { 1683 // Check for valid fallback resource 1684 const char *resource, *fallback_resource; 1685 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1686 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1687 CeedChk(ierr); 1688 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1689 // Fallback to reference Ceed 1690 if (!op->op_fallback) { 1691 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1692 } 1693 // Assemble 1694 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1695 cols); CeedChk(ierr); 1696 return CEED_ERROR_SUCCESS; 1697 } 1698 } 1699 1700 // Default interface implementation 1701 1702 // count entries and allocate rows, cols arrays 1703 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1704 *num_entries = 0; 1705 if (is_composite) { 1706 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1707 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1708 for (CeedInt k = 0; k < num_suboperators; ++k) { 1709 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1710 &single_entries); CeedChk(ierr); 1711 *num_entries += single_entries; 1712 } 1713 } else { 1714 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1715 &single_entries); CeedChk(ierr); 1716 *num_entries += single_entries; 1717 } 1718 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1719 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1720 1721 // assemble nonzero locations 1722 CeedInt offset = 0; 1723 if (is_composite) { 1724 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1725 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1726 for (CeedInt k = 0; k < num_suboperators; ++k) { 1727 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1728 *cols); CeedChk(ierr); 1729 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1730 &single_entries); 1731 CeedChk(ierr); 1732 offset += single_entries; 1733 } 1734 } else { 1735 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1736 CeedChk(ierr); 1737 } 1738 1739 return CEED_ERROR_SUCCESS; 1740 } 1741 1742 /** 1743 @brief Fully assemble the nonzero entries of a linear operator. 1744 1745 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1746 1747 The assembly routines use coordinate format, with num_entries tuples of the 1748 form (i, j, value) which indicate that value should be added to the matrix 1749 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1750 This function returns the values of the nonzero entries to be added, their 1751 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1752 1753 This will generally be slow unless your operator is low-order. 1754 1755 Note: Calling this function asserts that setup is complete 1756 and sets the CeedOperator as immutable. 1757 1758 @param[in] op CeedOperator to assemble 1759 @param[out] values Values to assemble into matrix 1760 1761 @ref User 1762 **/ 1763 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1764 int ierr; 1765 CeedInt num_suboperators, single_entries = 0; 1766 CeedOperator *sub_operators; 1767 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1768 1769 // Use backend version, if available 1770 if (op->LinearAssemble) { 1771 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1772 return CEED_ERROR_SUCCESS; 1773 } else { 1774 // Check for valid fallback resource 1775 const char *resource, *fallback_resource; 1776 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1777 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1778 CeedChk(ierr); 1779 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1780 // Fallback to reference Ceed 1781 if (!op->op_fallback) { 1782 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1783 } 1784 // Assemble 1785 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1786 return CEED_ERROR_SUCCESS; 1787 } 1788 } 1789 1790 // Default interface implementation 1791 bool is_composite; 1792 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1793 1794 CeedInt offset = 0; 1795 if (is_composite) { 1796 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1797 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1798 for (CeedInt k = 0; k < num_suboperators; ++k) { 1799 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1800 CeedChk(ierr); 1801 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1802 &single_entries); 1803 CeedChk(ierr); 1804 offset += single_entries; 1805 } 1806 } else { 1807 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1808 } 1809 1810 return CEED_ERROR_SUCCESS; 1811 } 1812 1813 /** 1814 @brief Create a multigrid coarse operator and level transfer operators 1815 for a CeedOperator, creating the prolongation basis from the 1816 fine and coarse grid interpolation 1817 1818 Note: Calling this function asserts that setup is complete 1819 and sets the CeedOperator as immutable. 1820 1821 @param[in] op_fine Fine grid operator 1822 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1823 @param[in] rstr_coarse Coarse grid restriction 1824 @param[in] basis_coarse Coarse grid active vector basis 1825 @param[out] op_coarse Coarse grid operator 1826 @param[out] op_prolong Coarse to fine operator 1827 @param[out] op_restrict Fine to coarse operator 1828 1829 @return An error code: 0 - success, otherwise - failure 1830 1831 @ref User 1832 **/ 1833 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1834 CeedVector p_mult_fine, 1835 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1836 CeedOperator *op_coarse, CeedOperator *op_prolong, 1837 CeedOperator *op_restrict) { 1838 int ierr; 1839 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1840 Ceed ceed; 1841 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1842 1843 // Check for compatible quadrature spaces 1844 CeedBasis basis_fine; 1845 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1846 CeedInt Q_f, Q_c; 1847 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1848 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1849 if (Q_f != Q_c) 1850 // LCOV_EXCL_START 1851 return CeedError(ceed, CEED_ERROR_DIMENSION, 1852 "Bases must have compatible quadrature spaces"); 1853 // LCOV_EXCL_STOP 1854 1855 // Coarse to fine basis 1856 CeedInt P_f, P_c, Q = Q_f; 1857 bool is_tensor_f, is_tensor_c; 1858 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1859 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1860 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1861 if (is_tensor_f && is_tensor_c) { 1862 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1863 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1864 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1865 } else if (!is_tensor_f && !is_tensor_c) { 1866 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1867 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1868 } else { 1869 // LCOV_EXCL_START 1870 return CeedError(ceed, CEED_ERROR_MINOR, 1871 "Bases must both be tensor or non-tensor"); 1872 // LCOV_EXCL_STOP 1873 } 1874 1875 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1876 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1877 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1878 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1879 if (is_tensor_f) { 1880 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1881 memcpy(interp_c, basis_coarse->interp_1d, 1882 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1883 } else { 1884 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1885 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1886 } 1887 1888 // -- QR Factorization, interp_f = Q R 1889 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1890 1891 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1892 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1893 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1894 1895 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1896 for (CeedInt j=0; j<P_c; j++) { // Column j 1897 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1898 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1899 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1900 for (CeedInt k=i+1; k<P_f; k++) 1901 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1902 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1903 } 1904 } 1905 ierr = CeedFree(&tau); CeedChk(ierr); 1906 ierr = CeedFree(&interp_c); CeedChk(ierr); 1907 ierr = CeedFree(&interp_f); CeedChk(ierr); 1908 1909 // Complete with interp_c_to_f versions of code 1910 if (is_tensor_f) { 1911 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1912 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1913 CeedChk(ierr); 1914 } else { 1915 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1916 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1917 CeedChk(ierr); 1918 } 1919 1920 // Cleanup 1921 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1922 return CEED_ERROR_SUCCESS; 1923 } 1924 1925 /** 1926 @brief Create a multigrid coarse operator and level transfer operators 1927 for a CeedOperator with a tensor basis for the active basis 1928 1929 Note: Calling this function asserts that setup is complete 1930 and sets the CeedOperator as immutable. 1931 1932 @param[in] op_fine Fine grid operator 1933 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1934 @param[in] rstr_coarse Coarse grid restriction 1935 @param[in] basis_coarse Coarse grid active vector basis 1936 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1937 @param[out] op_coarse Coarse grid operator 1938 @param[out] op_prolong Coarse to fine operator 1939 @param[out] op_restrict Fine to coarse operator 1940 1941 @return An error code: 0 - success, otherwise - failure 1942 1943 @ref User 1944 **/ 1945 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1946 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1947 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1948 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1949 int ierr; 1950 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1951 Ceed ceed; 1952 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1953 1954 // Check for compatible quadrature spaces 1955 CeedBasis basis_fine; 1956 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1957 CeedInt Q_f, Q_c; 1958 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1959 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1960 if (Q_f != Q_c) 1961 // LCOV_EXCL_START 1962 return CeedError(ceed, CEED_ERROR_DIMENSION, 1963 "Bases must have compatible quadrature spaces"); 1964 // LCOV_EXCL_STOP 1965 1966 // Coarse to fine basis 1967 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1968 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1969 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1970 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1971 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1972 CeedChk(ierr); 1973 P_1d_c = dim == 1 ? num_nodes_c : 1974 dim == 2 ? sqrt(num_nodes_c) : 1975 cbrt(num_nodes_c); 1976 CeedScalar *q_ref, *q_weight, *grad; 1977 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1978 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1979 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1980 CeedBasis basis_c_to_f; 1981 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1982 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1983 CeedChk(ierr); 1984 ierr = CeedFree(&q_ref); CeedChk(ierr); 1985 ierr = CeedFree(&q_weight); CeedChk(ierr); 1986 ierr = CeedFree(&grad); CeedChk(ierr); 1987 1988 // Core code 1989 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1990 basis_coarse, basis_c_to_f, op_coarse, 1991 op_prolong, op_restrict); 1992 CeedChk(ierr); 1993 return CEED_ERROR_SUCCESS; 1994 } 1995 1996 /** 1997 @brief Create a multigrid coarse operator and level transfer operators 1998 for a CeedOperator with a non-tensor basis for the active vector 1999 2000 Note: Calling this function asserts that setup is complete 2001 and sets the CeedOperator as immutable. 2002 2003 @param[in] op_fine Fine grid operator 2004 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2005 @param[in] rstr_coarse Coarse grid restriction 2006 @param[in] basis_coarse Coarse grid active vector basis 2007 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 2008 @param[out] op_coarse Coarse grid operator 2009 @param[out] op_prolong Coarse to fine operator 2010 @param[out] op_restrict Fine to coarse operator 2011 2012 @return An error code: 0 - success, otherwise - failure 2013 2014 @ref User 2015 **/ 2016 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 2017 CeedVector p_mult_fine, 2018 CeedElemRestriction rstr_coarse, 2019 CeedBasis basis_coarse, 2020 const CeedScalar *interp_c_to_f, 2021 CeedOperator *op_coarse, 2022 CeedOperator *op_prolong, 2023 CeedOperator *op_restrict) { 2024 int ierr; 2025 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2026 Ceed ceed; 2027 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2028 2029 // Check for compatible quadrature spaces 2030 CeedBasis basis_fine; 2031 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2032 CeedInt Q_f, Q_c; 2033 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2034 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2035 if (Q_f != Q_c) 2036 // LCOV_EXCL_START 2037 return CeedError(ceed, CEED_ERROR_DIMENSION, 2038 "Bases must have compatible quadrature spaces"); 2039 // LCOV_EXCL_STOP 2040 2041 // Coarse to fine basis 2042 CeedElemTopology topo; 2043 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 2044 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2045 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2046 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2047 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 2048 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2049 CeedChk(ierr); 2050 CeedScalar *q_ref, *q_weight, *grad; 2051 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 2052 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 2053 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 2054 CeedBasis basis_c_to_f; 2055 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 2056 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2057 CeedChk(ierr); 2058 ierr = CeedFree(&q_ref); CeedChk(ierr); 2059 ierr = CeedFree(&q_weight); CeedChk(ierr); 2060 ierr = CeedFree(&grad); CeedChk(ierr); 2061 2062 // Core code 2063 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2064 basis_coarse, basis_c_to_f, op_coarse, 2065 op_prolong, op_restrict); 2066 CeedChk(ierr); 2067 return CEED_ERROR_SUCCESS; 2068 } 2069 2070 /** 2071 @brief Build a FDM based approximate inverse for each element for a 2072 CeedOperator 2073 2074 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2075 Method based approximate inverse. This function obtains the simultaneous 2076 diagonalization for the 1D mass and Laplacian operators, 2077 M = V^T V, K = V^T S V. 2078 The assembled QFunction is used to modify the eigenvalues from simultaneous 2079 diagonalization and obtain an approximate inverse of the form 2080 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2081 associated CeedQFunction must therefore also be linear. 2082 2083 Note: Calling this function asserts that setup is complete 2084 and sets the CeedOperator as immutable. 2085 2086 @param op CeedOperator to create element inverses 2087 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2088 for each element 2089 @param request Address of CeedRequest for non-blocking completion, else 2090 @ref CEED_REQUEST_IMMEDIATE 2091 2092 @return An error code: 0 - success, otherwise - failure 2093 2094 @ref User 2095 **/ 2096 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2097 CeedRequest *request) { 2098 int ierr; 2099 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2100 2101 // Use backend version, if available 2102 if (op->CreateFDMElementInverse) { 2103 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2104 return CEED_ERROR_SUCCESS; 2105 } else { 2106 // Check for valid fallback resource 2107 const char *resource, *fallback_resource; 2108 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2109 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2110 CeedChk(ierr); 2111 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2112 // Fallback to reference Ceed 2113 if (!op->op_fallback) { 2114 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2115 } 2116 // Assemble 2117 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2118 CeedChk(ierr); 2119 return CEED_ERROR_SUCCESS; 2120 } 2121 } 2122 2123 // Interface implementation 2124 Ceed ceed, ceed_parent; 2125 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2126 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2127 ceed_parent = ceed_parent ? ceed_parent : ceed; 2128 CeedQFunction qf; 2129 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2130 2131 // Determine active input basis 2132 bool interp = false, grad = false; 2133 CeedBasis basis = NULL; 2134 CeedElemRestriction rstr = NULL; 2135 CeedOperatorField *op_fields; 2136 CeedQFunctionField *qf_fields; 2137 CeedInt num_input_fields; 2138 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2139 CeedChk(ierr); 2140 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2141 for (CeedInt i=0; i<num_input_fields; i++) { 2142 CeedVector vec; 2143 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2144 if (vec == CEED_VECTOR_ACTIVE) { 2145 CeedEvalMode eval_mode; 2146 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2147 interp = interp || eval_mode == CEED_EVAL_INTERP; 2148 grad = grad || eval_mode == CEED_EVAL_GRAD; 2149 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2150 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2151 } 2152 } 2153 if (!basis) 2154 // LCOV_EXCL_START 2155 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2156 // LCOV_EXCL_STOP 2157 CeedSize l_size = 1; 2158 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2159 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2160 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2161 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2162 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2163 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2164 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2165 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2166 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2167 2168 // Build and diagonalize 1D Mass and Laplacian 2169 bool tensor_basis; 2170 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2171 if (!tensor_basis) 2172 // LCOV_EXCL_START 2173 return CeedError(ceed, CEED_ERROR_BACKEND, 2174 "FDMElementInverse only supported for tensor " 2175 "bases"); 2176 // LCOV_EXCL_STOP 2177 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2178 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2179 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2180 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2181 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2182 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2183 // -- Build matrices 2184 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2185 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2186 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2187 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2188 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2189 mass, laplace); CeedChk(ierr); 2190 2191 // -- Diagonalize 2192 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2193 CeedChk(ierr); 2194 ierr = CeedFree(&mass); CeedChk(ierr); 2195 ierr = CeedFree(&laplace); CeedChk(ierr); 2196 for (CeedInt i=0; i<P_1d; i++) 2197 for (CeedInt j=0; j<P_1d; j++) 2198 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2199 ierr = CeedFree(&x); CeedChk(ierr); 2200 2201 // Assemble QFunction 2202 CeedVector assembled; 2203 CeedElemRestriction rstr_qf; 2204 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2205 &rstr_qf, request); CeedChk(ierr); 2206 CeedInt layout[3]; 2207 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2208 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2209 CeedScalar max_norm = 0; 2210 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2211 2212 // Calculate element averages 2213 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2214 CeedScalar *elem_avg; 2215 const CeedScalar *assembled_array, *q_weight_array; 2216 CeedVector q_weight; 2217 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2218 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2219 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2220 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2221 CeedChk(ierr); 2222 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2223 CeedChk(ierr); 2224 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2225 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2226 for (CeedInt e=0; e<num_elem; e++) { 2227 CeedInt count = 0; 2228 for (CeedInt q=0; q<num_qpts; q++) 2229 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2230 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2231 qf_value_bound) { 2232 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2233 q_weight_array[q]; 2234 count++; 2235 } 2236 if (count) { 2237 elem_avg[e] /= count; 2238 } else { 2239 elem_avg[e] = 1.0; 2240 } 2241 } 2242 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2243 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2244 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2245 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2246 2247 // Build FDM diagonal 2248 CeedVector q_data; 2249 CeedScalar *q_data_array, *fdm_diagonal; 2250 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2251 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2252 for (CeedInt c=0; c<num_comp; c++) 2253 for (CeedInt n=0; n<elem_size; n++) { 2254 if (interp) 2255 fdm_diagonal[c*elem_size + n] = 1.0; 2256 if (grad) 2257 for (CeedInt d=0; d<dim; d++) { 2258 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2259 fdm_diagonal[c*elem_size + n] += lambda[i]; 2260 } 2261 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2262 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2263 } 2264 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2265 CeedChk(ierr); 2266 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2267 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2268 CeedChk(ierr); 2269 for (CeedInt e=0; e<num_elem; e++) 2270 for (CeedInt c=0; c<num_comp; c++) 2271 for (CeedInt n=0; n<elem_size; n++) 2272 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2273 fdm_diagonal[c*elem_size + n]); 2274 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2275 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2276 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2277 2278 // Setup FDM operator 2279 // -- Basis 2280 CeedBasis fdm_basis; 2281 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2282 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2283 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2284 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2285 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2286 fdm_interp, grad_dummy, q_ref_dummy, 2287 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2288 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2289 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2290 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2291 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2292 ierr = CeedFree(&lambda); CeedChk(ierr); 2293 2294 // -- Restriction 2295 CeedElemRestriction rstr_qd_i; 2296 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2297 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2298 num_comp, num_elem*num_comp*elem_size, 2299 strides, &rstr_qd_i); CeedChk(ierr); 2300 // -- QFunction 2301 CeedQFunction qf_fdm; 2302 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2303 CeedChk(ierr); 2304 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2305 CeedChk(ierr); 2306 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2307 CeedChk(ierr); 2308 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2309 CeedChk(ierr); 2310 ierr = CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp); CeedChk(ierr); 2311 // -- QFunction context 2312 CeedInt *num_comp_data; 2313 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2314 num_comp_data[0] = num_comp; 2315 CeedQFunctionContext ctx_fdm; 2316 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2317 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2318 sizeof(*num_comp_data), num_comp_data); 2319 CeedChk(ierr); 2320 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2321 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2322 // -- Operator 2323 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2324 CeedChk(ierr); 2325 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2326 CeedChk(ierr); 2327 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2328 q_data); CeedChk(ierr); 2329 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2330 CeedChk(ierr); 2331 2332 // Cleanup 2333 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2334 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2335 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2336 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2337 2338 return CEED_ERROR_SUCCESS; 2339 } 2340 2341 /// @} 2342