1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed-impl.h> 11 #include <math.h> 12 #include <stdbool.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 /// @file 17 /// Implementation of CeedOperator preconditioning interfaces 18 19 /// ---------------------------------------------------------------------------- 20 /// CeedOperator Library Internal Preconditioning Functions 21 /// ---------------------------------------------------------------------------- 22 /// @addtogroup CeedOperatorDeveloper 23 /// @{ 24 25 /** 26 @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced 27 CeedOperator functionality 28 29 @param op CeedOperator to create fallback for 30 31 @return An error code: 0 - success, otherwise - failure 32 33 @ref Developer 34 **/ 35 int CeedOperatorCreateFallback(CeedOperator op) { 36 int ierr; 37 38 // Fallback Ceed 39 const char *resource, *fallback_resource; 40 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 41 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 42 CeedChk(ierr); 43 if (!strcmp(resource, fallback_resource)) 44 // LCOV_EXCL_START 45 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 46 "Backend %s cannot create an operator" 47 "fallback to resource %s", resource, fallback_resource); 48 // LCOV_EXCL_STOP 49 50 // Fallback Ceed 51 Ceed ceed_ref; 52 if (!op->ceed->op_fallback_ceed) { 53 ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr); 54 ceed_ref->op_fallback_parent = op->ceed; 55 ceed_ref->Error = op->ceed->Error; 56 op->ceed->op_fallback_ceed = ceed_ref; 57 } 58 ceed_ref = op->ceed->op_fallback_ceed; 59 60 // Clone Op 61 CeedOperator op_ref; 62 ierr = CeedCalloc(1, &op_ref); CeedChk(ierr); 63 memcpy(op_ref, op, sizeof(*op_ref)); 64 op_ref->data = NULL; 65 op_ref->is_interface_setup = false; 66 op_ref->is_backend_setup = false; 67 op_ref->ceed = ceed_ref; 68 ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr); 69 ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, 70 &op_ref->qf_assembled); CeedChk(ierr); 71 ierr = CeedOperatorSetName(op_ref, op->name); CeedChk(ierr); 72 op->op_fallback = op_ref; 73 74 // Clone QF 75 CeedQFunction qf_ref; 76 ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr); 77 memcpy(qf_ref, (op->qf), sizeof(*qf_ref)); 78 qf_ref->data = NULL; 79 qf_ref->ceed = ceed_ref; 80 ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr); 81 op_ref->qf = qf_ref; 82 op->qf_fallback = qf_ref; 83 return CEED_ERROR_SUCCESS; 84 } 85 86 /** 87 @brief Select correct basis matrix pointer based on CeedEvalMode 88 89 @param[in] eval_mode Current basis evaluation mode 90 @param[in] identity Pointer to identity matrix 91 @param[in] interp Pointer to interpolation matrix 92 @param[in] grad Pointer to gradient matrix 93 @param[out] basis_ptr Basis pointer to set 94 95 @ref Developer 96 **/ 97 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, 98 const CeedScalar *identity, const CeedScalar *interp, 99 const CeedScalar *grad, const CeedScalar **basis_ptr) { 100 switch (eval_mode) { 101 case CEED_EVAL_NONE: 102 *basis_ptr = identity; 103 break; 104 case CEED_EVAL_INTERP: 105 *basis_ptr = interp; 106 break; 107 case CEED_EVAL_GRAD: 108 *basis_ptr = grad; 109 break; 110 case CEED_EVAL_WEIGHT: 111 case CEED_EVAL_DIV: 112 case CEED_EVAL_CURL: 113 break; // Caught by QF Assembly 114 } 115 } 116 117 /** 118 @brief Create point block restriction for active operator field 119 120 @param[in] rstr Original CeedElemRestriction for active field 121 @param[out] pointblock_rstr Address of the variable where the newly created 122 CeedElemRestriction will be stored 123 124 @return An error code: 0 - success, otherwise - failure 125 126 @ref Developer 127 **/ 128 static int CeedOperatorCreateActivePointBlockRestriction( 129 CeedElemRestriction rstr, 130 CeedElemRestriction *pointblock_rstr) { 131 int ierr; 132 Ceed ceed; 133 ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 134 const CeedInt *offsets; 135 ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets); 136 CeedChk(ierr); 137 138 // Expand offsets 139 CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, 140 *pointblock_offsets; 141 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 142 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 143 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 144 ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); 145 CeedInt shift = num_comp; 146 if (comp_stride != 1) 147 shift *= num_comp; 148 ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets); 149 CeedChk(ierr); 150 for (CeedInt i = 0; i < num_elem*elem_size; i++) { 151 pointblock_offsets[i] = offsets[i]*shift; 152 if (pointblock_offsets[i] > max) 153 max = pointblock_offsets[i]; 154 } 155 156 // Create new restriction 157 ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, 158 1, max + num_comp*num_comp, CEED_MEM_HOST, 159 CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); 160 CeedChk(ierr); 161 162 // Cleanup 163 ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr); 164 165 return CEED_ERROR_SUCCESS; 166 } 167 168 /** 169 @brief Core logic for assembling operator diagonal or point block diagonal 170 171 @param[in] op CeedOperator to assemble point block diagonal 172 @param[in] request Address of CeedRequest for non-blocking completion, else 173 CEED_REQUEST_IMMEDIATE 174 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 175 @param[out] assembled CeedVector to store assembled diagonal 176 177 @return An error code: 0 - success, otherwise - failure 178 179 @ref Developer 180 **/ 181 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, 182 CeedRequest *request, const bool is_pointblock, CeedVector assembled) { 183 int ierr; 184 Ceed ceed; 185 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 186 187 // Assemble QFunction 188 CeedQFunction qf; 189 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 190 CeedInt num_input_fields, num_output_fields; 191 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 192 CeedChk(ierr); 193 CeedVector assembled_qf; 194 CeedElemRestriction rstr; 195 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, 196 &rstr, request); CeedChk(ierr); 197 CeedInt layout[3]; 198 ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); 199 ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); 200 CeedScalar max_norm = 0; 201 ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 202 203 // Determine active input basis 204 CeedOperatorField *op_fields; 205 CeedQFunctionField *qf_fields; 206 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); 207 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 208 CeedInt num_eval_mode_in = 0, num_comp, dim = 1; 209 CeedEvalMode *eval_mode_in = NULL; 210 CeedBasis basis_in = NULL; 211 CeedElemRestriction rstr_in = NULL; 212 for (CeedInt i=0; i<num_input_fields; i++) { 213 CeedVector vec; 214 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 215 if (vec == CEED_VECTOR_ACTIVE) { 216 CeedElemRestriction rstr; 217 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr); 218 ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr); 219 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 220 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 221 if (rstr_in && rstr_in != rstr) 222 // LCOV_EXCL_START 223 return CeedError(ceed, CEED_ERROR_BACKEND, 224 "Multi-field non-composite operator diagonal assembly not supported"); 225 // LCOV_EXCL_STOP 226 rstr_in = rstr; 227 CeedEvalMode eval_mode; 228 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 229 switch (eval_mode) { 230 case CEED_EVAL_NONE: 231 case CEED_EVAL_INTERP: 232 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 233 eval_mode_in[num_eval_mode_in] = eval_mode; 234 num_eval_mode_in += 1; 235 break; 236 case CEED_EVAL_GRAD: 237 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 238 for (CeedInt d=0; d<dim; d++) 239 eval_mode_in[num_eval_mode_in+d] = eval_mode; 240 num_eval_mode_in += dim; 241 break; 242 case CEED_EVAL_WEIGHT: 243 case CEED_EVAL_DIV: 244 case CEED_EVAL_CURL: 245 break; // Caught by QF Assembly 246 } 247 } 248 } 249 250 // Determine active output basis 251 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); 252 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 253 CeedInt num_eval_mode_out = 0; 254 CeedEvalMode *eval_mode_out = NULL; 255 CeedBasis basis_out = NULL; 256 CeedElemRestriction rstr_out = NULL; 257 for (CeedInt i=0; i<num_output_fields; i++) { 258 CeedVector vec; 259 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 260 if (vec == CEED_VECTOR_ACTIVE) { 261 CeedElemRestriction rstr; 262 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr); 263 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 264 if (rstr_out && rstr_out != rstr) 265 // LCOV_EXCL_START 266 return CeedError(ceed, CEED_ERROR_BACKEND, 267 "Multi-field non-composite operator diagonal assembly not supported"); 268 // LCOV_EXCL_STOP 269 rstr_out = rstr; 270 CeedEvalMode eval_mode; 271 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 272 switch (eval_mode) { 273 case CEED_EVAL_NONE: 274 case CEED_EVAL_INTERP: 275 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 276 eval_mode_out[num_eval_mode_out] = eval_mode; 277 num_eval_mode_out += 1; 278 break; 279 case CEED_EVAL_GRAD: 280 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 281 for (CeedInt d=0; d<dim; d++) 282 eval_mode_out[num_eval_mode_out+d] = eval_mode; 283 num_eval_mode_out += dim; 284 break; 285 case CEED_EVAL_WEIGHT: 286 case CEED_EVAL_DIV: 287 case CEED_EVAL_CURL: 288 break; // Caught by QF Assembly 289 } 290 } 291 } 292 293 // Assemble point block diagonal restriction, if needed 294 CeedElemRestriction diag_rstr = rstr_out; 295 if (is_pointblock) { 296 ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr); 297 CeedChk(ierr); 298 } 299 300 // Create diagonal vector 301 CeedVector elem_diag; 302 ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag); 303 CeedChk(ierr); 304 305 // Assemble element operator diagonals 306 CeedScalar *elem_diag_array; 307 const CeedScalar *assembled_qf_array; 308 ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); 309 ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); 310 CeedChk(ierr); 311 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 312 CeedChk(ierr); 313 CeedInt num_elem, num_nodes, num_qpts; 314 ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); 315 ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr); 316 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 317 // Basis matrices 318 const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out; 319 CeedScalar *identity = NULL; 320 bool evalNone = false; 321 for (CeedInt i=0; i<num_eval_mode_in; i++) 322 evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE); 323 for (CeedInt i=0; i<num_eval_mode_out; i++) 324 evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE); 325 if (evalNone) { 326 ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr); 327 for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++) 328 identity[i*num_nodes+i] = 1.0; 329 } 330 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 331 ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr); 332 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 333 ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); 334 // Compute the diagonal of B^T D B 335 // Each element 336 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 337 for (CeedInt e=0; e<num_elem; e++) { 338 CeedInt d_out = -1; 339 // Each basis eval mode pair 340 for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) { 341 const CeedScalar *bt = NULL; 342 if (eval_mode_out[e_out] == CEED_EVAL_GRAD) 343 d_out += 1; 344 CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, 345 &grad_out[d_out*num_qpts*num_nodes], &bt); 346 CeedInt d_in = -1; 347 for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) { 348 const CeedScalar *b = NULL; 349 if (eval_mode_in[e_in] == CEED_EVAL_GRAD) 350 d_in += 1; 351 CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, 352 &grad_in[d_in*num_qpts*num_nodes], &b); 353 // Each component 354 for (CeedInt c_out=0; c_out<num_comp; c_out++) 355 // Each qpoint/node pair 356 for (CeedInt q=0; q<num_qpts; q++) 357 if (is_pointblock) { 358 // Point Block Diagonal 359 for (CeedInt c_in=0; c_in<num_comp; c_in++) { 360 const CeedScalar qf_value = 361 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)* 362 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 363 if (fabs(qf_value) > qf_value_bound) 364 for (CeedInt n=0; n<num_nodes; n++) 365 elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] += 366 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 367 } 368 } else { 369 // Diagonal Only 370 const CeedScalar qf_value = 371 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)* 372 num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]]; 373 if (fabs(qf_value) > qf_value_bound) 374 for (CeedInt n=0; n<num_nodes; n++) 375 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] += 376 bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n]; 377 } 378 } 379 } 380 } 381 ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); 382 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 383 CeedChk(ierr); 384 385 // Assemble local operator diagonal 386 ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, 387 assembled, request); CeedChk(ierr); 388 389 // Cleanup 390 if (is_pointblock) { 391 ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr); 392 } 393 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 394 ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr); 395 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 396 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 397 ierr = CeedFree(&identity); CeedChk(ierr); 398 399 return CEED_ERROR_SUCCESS; 400 } 401 402 /** 403 @brief Core logic for assembling composite operator diagonal 404 405 @param[in] op CeedOperator to assemble point block diagonal 406 @param[in] request Address of CeedRequest for non-blocking completion, else 407 CEED_REQUEST_IMMEDIATE 408 @param[in] is_pointblock Boolean flag to assemble diagonal or point block diagonal 409 @param[out] assembled CeedVector to store assembled diagonal 410 411 @return An error code: 0 - success, otherwise - failure 412 413 @ref Developer 414 **/ 415 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( 416 CeedOperator op, CeedRequest *request, const bool is_pointblock, 417 CeedVector assembled) { 418 int ierr; 419 CeedInt num_sub; 420 CeedOperator *suboperators; 421 ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); 422 ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); 423 for (CeedInt i = 0; i < num_sub; i++) { 424 ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, 425 is_pointblock, assembled); CeedChk(ierr); 426 } 427 return CEED_ERROR_SUCCESS; 428 } 429 430 /** 431 @brief Build nonzero pattern for non-composite operator 432 433 Users should generally use CeedOperatorLinearAssembleSymbolic() 434 435 @param[in] op CeedOperator to assemble nonzero pattern 436 @param[in] offset Offset for number of entries 437 @param[out] rows Row number for each entry 438 @param[out] cols Column number for each entry 439 440 @return An error code: 0 - success, otherwise - failure 441 442 @ref Developer 443 **/ 444 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, 445 CeedInt *rows, CeedInt *cols) { 446 int ierr; 447 Ceed ceed = op->ceed; 448 if (op->is_composite) 449 // LCOV_EXCL_START 450 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 451 "Composite operator not supported"); 452 // LCOV_EXCL_STOP 453 454 CeedSize num_nodes; 455 ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr); 456 CeedElemRestriction rstr_in; 457 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr); 458 CeedInt num_elem, elem_size, num_comp; 459 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 460 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 461 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 462 CeedInt layout_er[3]; 463 ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr); 464 465 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 466 467 // Determine elem_dof relation 468 CeedVector index_vec; 469 ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); 470 CeedScalar *array; 471 ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); 472 for (CeedInt i = 0; i < num_nodes; ++i) { 473 array[i] = i; 474 } 475 ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr); 476 CeedVector elem_dof; 477 ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof); 478 CeedChk(ierr); 479 ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr); 480 CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, 481 elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 482 const CeedScalar *elem_dof_a; 483 ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a); 484 CeedChk(ierr); 485 ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr); 486 487 // Determine i, j locations for element matrices 488 CeedInt count = 0; 489 for (int e = 0; e < num_elem; ++e) { 490 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 491 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 492 for (int i = 0; i < elem_size; ++i) { 493 for (int j = 0; j < elem_size; ++j) { 494 const CeedInt elem_dof_index_row = (i)*layout_er[0] + 495 (comp_out)*layout_er[1] + e*layout_er[2]; 496 const CeedInt elem_dof_index_col = (j)*layout_er[0] + 497 (comp_in)*layout_er[1] + e*layout_er[2]; 498 499 const CeedInt row = elem_dof_a[elem_dof_index_row]; 500 const CeedInt col = elem_dof_a[elem_dof_index_col]; 501 502 rows[offset + count] = row; 503 cols[offset + count] = col; 504 count++; 505 } 506 } 507 } 508 } 509 } 510 if (count != local_num_entries) 511 // LCOV_EXCL_START 512 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries"); 513 // LCOV_EXCL_STOP 514 ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); 515 ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); 516 517 return CEED_ERROR_SUCCESS; 518 } 519 520 /** 521 @brief Assemble nonzero entries for non-composite operator 522 523 Users should generally use CeedOperatorLinearAssemble() 524 525 @param[in] op CeedOperator to assemble 526 @param[out] offset Offest for number of entries 527 @param[out] values Values to assemble into matrix 528 529 @return An error code: 0 - success, otherwise - failure 530 531 @ref Developer 532 **/ 533 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, 534 CeedVector values) { 535 int ierr; 536 Ceed ceed = op->ceed; 537 if (op->is_composite) 538 // LCOV_EXCL_START 539 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 540 "Composite operator not supported"); 541 // LCOV_EXCL_STOP 542 543 // Assemble QFunction 544 CeedQFunction qf; 545 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 546 CeedVector assembled_qf; 547 CeedElemRestriction rstr_q; 548 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate( 549 op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 550 551 CeedSize qf_length; 552 ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr); 553 554 CeedInt num_input_fields, num_output_fields; 555 CeedOperatorField *input_fields; 556 CeedOperatorField *output_fields; 557 ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields, 558 &num_output_fields, &output_fields); CeedChk(ierr); 559 560 // Determine active input basis 561 CeedQFunctionField *qf_fields; 562 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 563 CeedInt num_eval_mode_in = 0, dim = 1; 564 CeedEvalMode *eval_mode_in = NULL; 565 CeedBasis basis_in = NULL; 566 CeedElemRestriction rstr_in = NULL; 567 for (CeedInt i=0; i<num_input_fields; i++) { 568 CeedVector vec; 569 ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr); 570 if (vec == CEED_VECTOR_ACTIVE) { 571 ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in); 572 CeedChk(ierr); 573 ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr); 574 ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in); 575 CeedChk(ierr); 576 CeedEvalMode eval_mode; 577 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 578 CeedChk(ierr); 579 switch (eval_mode) { 580 case CEED_EVAL_NONE: 581 case CEED_EVAL_INTERP: 582 ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr); 583 eval_mode_in[num_eval_mode_in] = eval_mode; 584 num_eval_mode_in += 1; 585 break; 586 case CEED_EVAL_GRAD: 587 ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr); 588 for (CeedInt d=0; d<dim; d++) { 589 eval_mode_in[num_eval_mode_in+d] = eval_mode; 590 } 591 num_eval_mode_in += dim; 592 break; 593 case CEED_EVAL_WEIGHT: 594 case CEED_EVAL_DIV: 595 case CEED_EVAL_CURL: 596 break; // Caught by QF Assembly 597 } 598 } 599 } 600 601 // Determine active output basis 602 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr); 603 CeedInt num_eval_mode_out = 0; 604 CeedEvalMode *eval_mode_out = NULL; 605 CeedBasis basis_out = NULL; 606 CeedElemRestriction rstr_out = NULL; 607 for (CeedInt i=0; i<num_output_fields; i++) { 608 CeedVector vec; 609 ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr); 610 if (vec == CEED_VECTOR_ACTIVE) { 611 ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out); 612 CeedChk(ierr); 613 ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out); 614 CeedChk(ierr); 615 CeedEvalMode eval_mode; 616 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 617 CeedChk(ierr); 618 switch (eval_mode) { 619 case CEED_EVAL_NONE: 620 case CEED_EVAL_INTERP: 621 ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr); 622 eval_mode_out[num_eval_mode_out] = eval_mode; 623 num_eval_mode_out += 1; 624 break; 625 case CEED_EVAL_GRAD: 626 ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr); 627 for (CeedInt d=0; d<dim; d++) { 628 eval_mode_out[num_eval_mode_out+d] = eval_mode; 629 } 630 num_eval_mode_out += dim; 631 break; 632 case CEED_EVAL_WEIGHT: 633 case CEED_EVAL_DIV: 634 case CEED_EVAL_CURL: 635 break; // Caught by QF Assembly 636 } 637 } 638 } 639 640 if (num_eval_mode_in == 0 || num_eval_mode_out == 0) 641 // LCOV_EXCL_START 642 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 643 "Cannot assemble operator with out inputs/outputs"); 644 // LCOV_EXCL_STOP 645 646 CeedInt num_elem, elem_size, num_qpts, num_comp; 647 ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr); 648 ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr); 649 ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr); 650 ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr); 651 652 CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 653 654 // loop over elements and put in data structure 655 const CeedScalar *interp_in, *grad_in; 656 ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr); 657 ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr); 658 659 const CeedScalar *assembled_qf_array; 660 ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); 661 CeedChk(ierr); 662 663 CeedInt layout_qf[3]; 664 ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr); 665 ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); 666 667 // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order 668 CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size]; 669 CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size]; 670 CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in * 671 num_qpts]; // logically 3-tensor 672 CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in]; 673 CeedScalar elem_mat[elem_size * elem_size]; 674 int count = 0; 675 CeedScalar *vals; 676 ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); 677 for (int e = 0; e < num_elem; ++e) { 678 for (int comp_in = 0; comp_in < num_comp; ++comp_in) { 679 for (int comp_out = 0; comp_out < num_comp; ++comp_out) { 680 for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) { 681 B_mat_in[ell] = 0.0; 682 } 683 for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) { 684 B_mat_out[ell] = 0.0; 685 } 686 // Store block-diagonal D matrix as collection of small dense blocks 687 for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) { 688 D_mat[ell] = 0.0; 689 } 690 // form element matrix itself (for each block component) 691 for (int ell = 0; ell < elem_size*elem_size; ++ell) { 692 elem_mat[ell] = 0.0; 693 } 694 for (int q = 0; q < num_qpts; ++q) { 695 for (int n = 0; n < elem_size; ++n) { 696 CeedInt d_in = -1; 697 for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) { 698 const int qq = num_eval_mode_in*q; 699 if (eval_mode_in[e_in] == CEED_EVAL_INTERP) { 700 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n]; 701 } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) { 702 d_in += 1; 703 B_mat_in[(qq+e_in)*elem_size + n] += 704 grad_in[(d_in*num_qpts+q) * elem_size + n]; 705 } else { 706 // LCOV_EXCL_START 707 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 708 // LCOV_EXCL_STOP 709 } 710 } 711 CeedInt d_out = -1; 712 for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) { 713 const int qq = num_eval_mode_out*q; 714 if (eval_mode_out[e_out] == CEED_EVAL_INTERP) { 715 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n]; 716 } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) { 717 d_out += 1; 718 B_mat_out[(qq+e_out)*elem_size + n] += 719 grad_in[(d_out*num_qpts+q) * elem_size + n]; 720 } else { 721 // LCOV_EXCL_START 722 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!"); 723 // LCOV_EXCL_STOP 724 } 725 } 726 } 727 for (int ei = 0; ei < num_eval_mode_out; ++ei) { 728 for (int ej = 0; ej < num_eval_mode_in; ++ej) { 729 const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp 730 +comp_out; 731 const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + 732 e*layout_qf[2]; 733 D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index]; 734 } 735 } 736 } 737 // Compute B^T*D 738 for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) { 739 BTD[ell] = 0.0; 740 } 741 for (int j = 0; j<elem_size; ++j) { 742 for (int q = 0; q<num_qpts; ++q) { 743 int qq = num_eval_mode_out*q; 744 for (int ei = 0; ei < num_eval_mode_in; ++ei) { 745 for (int ej = 0; ej < num_eval_mode_out; ++ej) { 746 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] += 747 B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q]; 748 } 749 } 750 } 751 } 752 753 ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, 754 elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); 755 756 // put element matrix in coordinate data structure 757 for (int i = 0; i < elem_size; ++i) { 758 for (int j = 0; j < elem_size; ++j) { 759 vals[offset + count] = elem_mat[i*elem_size + j]; 760 count++; 761 } 762 } 763 } 764 } 765 } 766 if (count != local_num_entries) 767 // LCOV_EXCL_START 768 return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries"); 769 // LCOV_EXCL_STOP 770 ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr); 771 772 ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); 773 CeedChk(ierr); 774 ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); 775 ierr = CeedFree(&eval_mode_in); CeedChk(ierr); 776 ierr = CeedFree(&eval_mode_out); CeedChk(ierr); 777 778 return CEED_ERROR_SUCCESS; 779 } 780 781 /** 782 @brief Count number of entries for assembled CeedOperator 783 784 @param[in] op CeedOperator to assemble 785 @param[out] num_entries Number of entries in assembled representation 786 787 @return An error code: 0 - success, otherwise - failure 788 789 @ref Utility 790 **/ 791 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, 792 CeedInt *num_entries) { 793 int ierr; 794 CeedElemRestriction rstr; 795 CeedInt num_elem, elem_size, num_comp; 796 797 if (op->is_composite) 798 // LCOV_EXCL_START 799 return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, 800 "Composite operator not supported"); 801 // LCOV_EXCL_STOP 802 ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr); 803 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 804 ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); 805 ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); 806 *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem; 807 808 return CEED_ERROR_SUCCESS; 809 } 810 811 /** 812 @brief Common code for creating a multigrid coarse operator and level 813 transfer operators for a CeedOperator 814 815 @param[in] op_fine Fine grid operator 816 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 817 @param[in] rstr_coarse Coarse grid restriction 818 @param[in] basis_coarse Coarse grid active vector basis 819 @param[in] basis_c_to_f Basis for coarse to fine interpolation 820 @param[out] op_coarse Coarse grid operator 821 @param[out] op_prolong Coarse to fine operator 822 @param[out] op_restrict Fine to coarse operator 823 824 @return An error code: 0 - success, otherwise - failure 825 826 @ref Developer 827 **/ 828 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, 829 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 830 CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, 831 CeedOperator *op_restrict) { 832 int ierr; 833 Ceed ceed; 834 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 835 836 // Check for composite operator 837 bool is_composite; 838 ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr); 839 if (is_composite) 840 // LCOV_EXCL_START 841 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 842 "Automatic multigrid setup for composite operators not supported"); 843 // LCOV_EXCL_STOP 844 845 // Coarse Grid 846 ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, 847 op_coarse); CeedChk(ierr); 848 CeedElemRestriction rstr_fine = NULL; 849 // -- Clone input fields 850 for (int i = 0; i < op_fine->qf->num_input_fields; i++) { 851 if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) { 852 rstr_fine = op_fine->input_fields[i]->elem_restr; 853 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 854 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 855 CeedChk(ierr); 856 } else { 857 ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, 858 op_fine->input_fields[i]->elem_restr, 859 op_fine->input_fields[i]->basis, 860 op_fine->input_fields[i]->vec); CeedChk(ierr); 861 } 862 } 863 // -- Clone output fields 864 for (int i = 0; i < op_fine->qf->num_output_fields; i++) { 865 if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) { 866 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 867 rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE); 868 CeedChk(ierr); 869 } else { 870 ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, 871 op_fine->output_fields[i]->elem_restr, 872 op_fine->output_fields[i]->basis, 873 op_fine->output_fields[i]->vec); CeedChk(ierr); 874 } 875 } 876 // -- Clone QFunctionAssemblyData 877 ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, 878 &(*op_coarse)->qf_assembled); CeedChk(ierr); 879 880 // Multiplicity vector 881 CeedVector mult_vec, mult_e_vec; 882 ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec); 883 CeedChk(ierr); 884 ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr); 885 ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, 886 mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 887 ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr); 888 ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, 889 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 890 ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr); 891 ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr); 892 893 // Restriction 894 CeedInt num_comp; 895 ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr); 896 CeedQFunction qf_restrict; 897 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict); 898 CeedChk(ierr); 899 CeedInt *num_comp_r_data; 900 ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr); 901 num_comp_r_data[0] = num_comp; 902 CeedQFunctionContext ctx_r; 903 ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr); 904 ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, 905 sizeof(*num_comp_r_data), num_comp_r_data); 906 CeedChk(ierr); 907 ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr); 908 ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr); 909 ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE); 910 CeedChk(ierr); 911 ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE); 912 CeedChk(ierr); 913 ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp, 914 CEED_EVAL_INTERP); CeedChk(ierr); 915 ierr = CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp); CeedChk(ierr); 916 917 ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, 918 CEED_QFUNCTION_NONE, op_restrict); 919 CeedChk(ierr); 920 ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine, 921 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 922 CeedChk(ierr); 923 ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine, 924 CEED_BASIS_COLLOCATED, mult_vec); 925 CeedChk(ierr); 926 ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, 927 CEED_VECTOR_ACTIVE); CeedChk(ierr); 928 929 // Prolongation 930 CeedQFunction qf_prolong; 931 ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong); 932 CeedChk(ierr); 933 CeedInt *num_comp_p_data; 934 ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr); 935 num_comp_p_data[0] = num_comp; 936 CeedQFunctionContext ctx_p; 937 ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr); 938 ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, 939 sizeof(*num_comp_p_data), num_comp_p_data); 940 CeedChk(ierr); 941 ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr); 942 ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr); 943 ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP); 944 CeedChk(ierr); 945 ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE); 946 CeedChk(ierr); 947 ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE); 948 CeedChk(ierr); 949 ierr = CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp); CeedChk(ierr); 950 951 ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, 952 CEED_QFUNCTION_NONE, op_prolong); 953 CeedChk(ierr); 954 ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, 955 CEED_VECTOR_ACTIVE); CeedChk(ierr); 956 ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine, 957 CEED_BASIS_COLLOCATED, mult_vec); 958 CeedChk(ierr); 959 ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine, 960 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 961 CeedChk(ierr); 962 963 // Clone name 964 bool has_name = op_fine->name; 965 size_t name_len = op_fine->name ? strlen(op_fine->name) : 0; 966 ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr); 967 { 968 char *prolongation_name; 969 ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr); 970 sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", 971 op_fine->name); 972 ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr); 973 ierr = CeedFree(&prolongation_name); CeedChk(ierr); 974 } 975 { 976 char *restriction_name; 977 ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr); 978 sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", 979 op_fine->name); 980 ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr); 981 ierr = CeedFree(&restriction_name); CeedChk(ierr); 982 } 983 984 // Cleanup 985 ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr); 986 ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr); 987 ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); 988 ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); 989 return CEED_ERROR_SUCCESS; 990 } 991 992 /** 993 @brief Build 1D mass matrix and Laplacian with perturbation 994 995 @param[in] interp_1d Interpolation matrix in one dimension 996 @param[in] grad_1d Gradient matrix in one dimension 997 @param[in] q_weight_1d Quadrature weights in one dimension 998 @param[in] P_1d Number of basis nodes in one dimension 999 @param[in] Q_1d Number of quadrature points in one dimension 1000 @param[in] dim Dimension of basis 1001 @param[out] mass Assembled mass matrix in one dimension 1002 @param[out] laplace Assembled perturbed Laplacian in one dimension 1003 1004 @return An error code: 0 - success, otherwise - failure 1005 1006 @ref Developer 1007 **/ 1008 CeedPragmaOptimizeOff 1009 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, 1010 const CeedScalar *grad_1d, 1011 const CeedScalar *q_weight_1d, CeedInt P_1d, 1012 CeedInt Q_1d, CeedInt dim, 1013 CeedScalar *mass, CeedScalar *laplace) { 1014 1015 for (CeedInt i=0; i<P_1d; i++) 1016 for (CeedInt j=0; j<P_1d; j++) { 1017 CeedScalar sum = 0.0; 1018 for (CeedInt k=0; k<Q_1d; k++) 1019 sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j]; 1020 mass[i+j*P_1d] = sum; 1021 } 1022 // -- Laplacian 1023 for (CeedInt i=0; i<P_1d; i++) 1024 for (CeedInt j=0; j<P_1d; j++) { 1025 CeedScalar sum = 0.0; 1026 for (CeedInt k=0; k<Q_1d; k++) 1027 sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j]; 1028 laplace[i+j*P_1d] = sum; 1029 } 1030 CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4; 1031 for (CeedInt i=0; i<P_1d; i++) 1032 laplace[i+P_1d*i] += perturbation; 1033 return CEED_ERROR_SUCCESS; 1034 } 1035 CeedPragmaOptimizeOn 1036 1037 /// @} 1038 1039 /// ---------------------------------------------------------------------------- 1040 /// CeedOperator Backend API 1041 /// ---------------------------------------------------------------------------- 1042 /// @addtogroup CeedOperatorBackend 1043 /// @{ 1044 1045 /** 1046 @brief Create object holding CeedQFunction assembly data for CeedOperator 1047 1048 @param[in] ceed A Ceed object where the CeedQFunctionAssemblyData will be created 1049 @param[out] data Address of the variable where the newly created 1050 CeedQFunctionAssemblyData will be stored 1051 1052 @return An error code: 0 - success, otherwise - failure 1053 1054 @ref Backend 1055 **/ 1056 int CeedQFunctionAssemblyDataCreate(Ceed ceed, 1057 CeedQFunctionAssemblyData *data) { 1058 int ierr; 1059 1060 ierr = CeedCalloc(1, data); CeedChk(ierr); 1061 (*data)->ref_count = 1; 1062 (*data)->ceed = ceed; 1063 ierr = CeedReference(ceed); CeedChk(ierr); 1064 1065 return CEED_ERROR_SUCCESS; 1066 } 1067 1068 /** 1069 @brief Increment the reference counter for a CeedQFunctionAssemblyData 1070 1071 @param data CeedQFunctionAssemblyData to increment the reference counter 1072 1073 @return An error code: 0 - success, otherwise - failure 1074 1075 @ref Backend 1076 **/ 1077 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) { 1078 data->ref_count++; 1079 return CEED_ERROR_SUCCESS; 1080 } 1081 1082 /** 1083 @brief Set re-use of CeedQFunctionAssemblyData 1084 1085 @param data CeedQFunctionAssemblyData to mark for reuse 1086 @param reuse_data Boolean flag indicating data re-use 1087 1088 @return An error code: 0 - success, otherwise - failure 1089 1090 @ref Backend 1091 **/ 1092 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, 1093 bool reuse_data) { 1094 data->reuse_data = reuse_data; 1095 data->needs_data_update = true; 1096 return CEED_ERROR_SUCCESS; 1097 } 1098 1099 /** 1100 @brief Mark QFunctionAssemblyData as stale 1101 1102 @param data CeedQFunctionAssemblyData to mark as stale 1103 @param needs_data_update Boolean flag indicating if update is needed or completed 1104 1105 @return An error code: 0 - success, otherwise - failure 1106 1107 @ref Backend 1108 **/ 1109 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, 1110 bool needs_data_update) { 1111 data->needs_data_update = needs_data_update; 1112 return CEED_ERROR_SUCCESS; 1113 } 1114 1115 /** 1116 @brief Determine if QFunctionAssemblyData needs update 1117 1118 @param[in] data CeedQFunctionAssemblyData to mark as stale 1119 @param[out] is_update_needed Boolean flag indicating if re-assembly is required 1120 1121 @return An error code: 0 - success, otherwise - failure 1122 1123 @ref Backend 1124 **/ 1125 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, 1126 bool *is_update_needed) { 1127 *is_update_needed = !data->reuse_data || data->needs_data_update; 1128 return CEED_ERROR_SUCCESS; 1129 } 1130 1131 /** 1132 @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should 1133 be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`; 1134 Note: If `*data_copy` is non-NULL, then it is assumed that 1135 `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This 1136 CeedQFunctionAssemblyData will be destroyed if `*data_copy` is 1137 the only reference to this CeedQFunctionAssemblyData. 1138 1139 @param data CeedQFunctionAssemblyData to copy reference to 1140 @param[out] data_copy Variable to store copied reference 1141 1142 @return An error code: 0 - success, otherwise - failure 1143 1144 @ref Backend 1145 **/ 1146 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, 1147 CeedQFunctionAssemblyData *data_copy) { 1148 int ierr; 1149 1150 ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr); 1151 ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr); 1152 *data_copy = data; 1153 return CEED_ERROR_SUCCESS; 1154 } 1155 1156 /** 1157 @brief Get setup status for internal objects for CeedQFunctionAssemblyData 1158 1159 @param[in] data CeedQFunctionAssemblyData to retreive status 1160 @param[out] is_setup Boolean flag for setup status 1161 1162 @return An error code: 0 - success, otherwise - failure 1163 1164 @ref Backend 1165 **/ 1166 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, 1167 bool *is_setup) { 1168 *is_setup = data->is_setup; 1169 return CEED_ERROR_SUCCESS; 1170 } 1171 1172 /** 1173 @brief Set internal objects for CeedQFunctionAssemblyData 1174 1175 @param[in] data CeedQFunctionAssemblyData to set objects 1176 @param[in] vec CeedVector to store assembled CeedQFunction at quadrature points 1177 @param[in] rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction 1178 1179 @return An error code: 0 - success, otherwise - failure 1180 1181 @ref Backend 1182 **/ 1183 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, 1184 CeedVector vec, CeedElemRestriction rstr) { 1185 int ierr; 1186 1187 ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr); 1188 ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr); 1189 1190 data->is_setup = true; 1191 return CEED_ERROR_SUCCESS; 1192 } 1193 1194 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, 1195 CeedVector *vec, CeedElemRestriction *rstr) { 1196 int ierr; 1197 1198 if (!data->is_setup) 1199 // LCOV_EXCL_START 1200 return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, 1201 "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first."); 1202 // LCOV_EXCL_STOP 1203 1204 ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr); 1205 ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr); 1206 1207 return CEED_ERROR_SUCCESS; 1208 } 1209 1210 /** 1211 @brief Destroy CeedQFunctionAssemblyData 1212 1213 @param[out] data CeedQFunctionAssemblyData to destroy 1214 1215 @return An error code: 0 - success, otherwise - failure 1216 1217 @ref Backend 1218 **/ 1219 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { 1220 int ierr; 1221 1222 if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS; 1223 1224 ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); 1225 ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr); 1226 ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr); 1227 1228 ierr = CeedFree(data); CeedChk(ierr); 1229 return CEED_ERROR_SUCCESS; 1230 } 1231 1232 /// @} 1233 1234 /// ---------------------------------------------------------------------------- 1235 /// CeedOperator Public API 1236 /// ---------------------------------------------------------------------------- 1237 /// @addtogroup CeedOperatorUser 1238 /// @{ 1239 1240 /** 1241 @brief Assemble a linear CeedQFunction associated with a CeedOperator 1242 1243 This returns a CeedVector containing a matrix at each quadrature point 1244 providing the action of the CeedQFunction associated with the CeedOperator. 1245 The vector 'assembled' is of shape 1246 [num_elements, num_input_fields, num_output_fields, num_quad_points] 1247 and contains column-major matrices representing the action of the 1248 CeedQFunction for a corresponding quadrature point on an element. Inputs and 1249 outputs are in the order provided by the user when adding CeedOperator fields. 1250 For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 1251 'v', provided in that order, would result in an assembled QFunction that 1252 consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting 1253 on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v]. 1254 1255 Note: Calling this function asserts that setup is complete 1256 and sets the CeedOperator as immutable. 1257 1258 @param op CeedOperator to assemble CeedQFunction 1259 @param[out] assembled CeedVector to store assembled CeedQFunction at 1260 quadrature points 1261 @param[out] rstr CeedElemRestriction for CeedVector containing assembled 1262 CeedQFunction 1263 @param request Address of CeedRequest for non-blocking completion, else 1264 @ref CEED_REQUEST_IMMEDIATE 1265 1266 @return An error code: 0 - success, otherwise - failure 1267 1268 @ref User 1269 **/ 1270 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, 1271 CeedElemRestriction *rstr, 1272 CeedRequest *request) { 1273 int ierr; 1274 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1275 1276 // Backend version 1277 if (op->LinearAssembleQFunction) { 1278 ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); 1279 CeedChk(ierr); 1280 } else { 1281 // Fallback to reference Ceed 1282 if (!op->op_fallback) { 1283 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1284 } 1285 // Assemble 1286 ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, 1287 rstr, request); CeedChk(ierr); 1288 } 1289 return CEED_ERROR_SUCCESS; 1290 } 1291 1292 /** 1293 @brief Assemble CeedQFunction and store result internall. Return copied 1294 references of stored data to the caller. Caller is responsible for 1295 ownership and destruction of the copied references. See also 1296 @ref CeedOperatorLinearAssembleQFunction 1297 1298 @param op CeedOperator to assemble CeedQFunction 1299 @param assembled CeedVector to store assembled CeedQFunction at 1300 quadrature points 1301 @param rstr CeedElemRestriction for CeedVector containing assembled 1302 CeedQFunction 1303 @param request Address of CeedRequest for non-blocking completion, else 1304 @ref CEED_REQUEST_IMMEDIATE 1305 1306 @return An error code: 0 - success, otherwise - failure 1307 1308 @ref User 1309 **/ 1310 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, 1311 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 1312 int ierr; 1313 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1314 1315 // Backend version 1316 if (op->LinearAssembleQFunctionUpdate) { 1317 bool qf_assembled_is_setup; 1318 CeedVector assembled_vec = NULL; 1319 CeedElemRestriction assembled_rstr = NULL; 1320 1321 ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, 1322 &qf_assembled_is_setup); CeedChk(ierr); 1323 if (qf_assembled_is_setup) { 1324 ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, 1325 &assembled_rstr); CeedChk(ierr); 1326 1327 bool update_needed; 1328 ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, 1329 &update_needed); CeedChk(ierr); 1330 if (update_needed) { 1331 ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, 1332 request); CeedChk(ierr); 1333 } 1334 } else { 1335 ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, 1336 request); CeedChk(ierr); 1337 ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, 1338 assembled_rstr); CeedChk(ierr); 1339 } 1340 ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); 1341 CeedChk(ierr); 1342 1343 // Copy reference to internally held copy 1344 *assembled = NULL; 1345 *rstr = NULL; 1346 ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); 1347 ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr); 1348 ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); 1349 ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); 1350 } else { 1351 // Fallback to reference Ceed 1352 if (!op->op_fallback) { 1353 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1354 } 1355 // Assemble 1356 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, 1357 assembled, rstr, request); CeedChk(ierr); 1358 } 1359 1360 return CEED_ERROR_SUCCESS; 1361 } 1362 1363 /** 1364 @brief Assemble the diagonal of a square linear CeedOperator 1365 1366 This overwrites a CeedVector with the diagonal of a linear CeedOperator. 1367 1368 Note: Currently only non-composite CeedOperators with a single field and 1369 composite CeedOperators with single field sub-operators are supported. 1370 1371 Note: Calling this function asserts that setup is complete 1372 and sets the CeedOperator as immutable. 1373 1374 @param op CeedOperator to assemble CeedQFunction 1375 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1376 @param request Address of CeedRequest for non-blocking completion, else 1377 @ref CEED_REQUEST_IMMEDIATE 1378 1379 @return An error code: 0 - success, otherwise - failure 1380 1381 @ref User 1382 **/ 1383 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, 1384 CeedRequest *request) { 1385 int ierr; 1386 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1387 1388 CeedSize input_size = 0, output_size = 0; 1389 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1390 CeedChk(ierr); 1391 if (input_size != output_size) 1392 // LCOV_EXCL_START 1393 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1394 // LCOV_EXCL_STOP 1395 1396 // Use backend version, if available 1397 if (op->LinearAssembleDiagonal) { 1398 ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); 1399 return CEED_ERROR_SUCCESS; 1400 } else if (op->LinearAssembleAddDiagonal) { 1401 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1402 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1403 return CEED_ERROR_SUCCESS; 1404 } else { 1405 // Check for valid fallback resource 1406 const char *resource, *fallback_resource; 1407 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1408 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1409 CeedChk(ierr); 1410 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1411 // Fallback to reference Ceed 1412 if (!op->op_fallback) { 1413 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1414 } 1415 // Assemble 1416 ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); 1417 CeedChk(ierr); 1418 return CEED_ERROR_SUCCESS; 1419 } 1420 } 1421 1422 // Default interface implementation 1423 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1424 ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); 1425 CeedChk(ierr); 1426 return CEED_ERROR_SUCCESS; 1427 } 1428 1429 /** 1430 @brief Assemble the diagonal of a square linear CeedOperator 1431 1432 This sums into a CeedVector the diagonal of a linear CeedOperator. 1433 1434 Note: Currently only non-composite CeedOperators with a single field and 1435 composite CeedOperators with single field sub-operators are supported. 1436 1437 Note: Calling this function asserts that setup is complete 1438 and sets the CeedOperator as immutable. 1439 1440 @param op CeedOperator to assemble CeedQFunction 1441 @param[out] assembled CeedVector to store assembled CeedOperator diagonal 1442 @param request Address of CeedRequest for non-blocking completion, else 1443 @ref CEED_REQUEST_IMMEDIATE 1444 1445 @return An error code: 0 - success, otherwise - failure 1446 1447 @ref User 1448 **/ 1449 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, 1450 CeedRequest *request) { 1451 int ierr; 1452 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1453 1454 CeedSize input_size = 0, output_size = 0; 1455 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1456 CeedChk(ierr); 1457 if (input_size != output_size) 1458 // LCOV_EXCL_START 1459 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1460 // LCOV_EXCL_STOP 1461 1462 // Use backend version, if available 1463 if (op->LinearAssembleAddDiagonal) { 1464 ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); 1465 return CEED_ERROR_SUCCESS; 1466 } else { 1467 // Check for valid fallback resource 1468 const char *resource, *fallback_resource; 1469 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1470 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1471 CeedChk(ierr); 1472 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1473 // Fallback to reference Ceed 1474 if (!op->op_fallback) { 1475 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1476 } 1477 // Assemble 1478 ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, 1479 request); CeedChk(ierr); 1480 return CEED_ERROR_SUCCESS; 1481 } 1482 } 1483 1484 // Default interface implementation 1485 bool is_composite; 1486 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1487 if (is_composite) { 1488 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1489 false, assembled); CeedChk(ierr); 1490 return CEED_ERROR_SUCCESS; 1491 } else { 1492 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); 1493 CeedChk(ierr); 1494 return CEED_ERROR_SUCCESS; 1495 } 1496 } 1497 1498 /** 1499 @brief Assemble the point block diagonal of a square linear CeedOperator 1500 1501 This overwrites a CeedVector with the point block diagonal of a linear 1502 CeedOperator. 1503 1504 Note: Currently only non-composite CeedOperators with a single field and 1505 composite CeedOperators with single field sub-operators are supported. 1506 1507 Note: Calling this function asserts that setup is complete 1508 and sets the CeedOperator as immutable. 1509 1510 @param op CeedOperator to assemble CeedQFunction 1511 @param[out] assembled CeedVector to store assembled CeedOperator point block 1512 diagonal, provided in row-major form with an 1513 @a num_comp * @a num_comp block at each node. The dimensions 1514 of this vector are derived from the active vector 1515 for the CeedOperator. The array has shape 1516 [nodes, component out, component in]. 1517 @param request Address of CeedRequest for non-blocking completion, else 1518 CEED_REQUEST_IMMEDIATE 1519 1520 @return An error code: 0 - success, otherwise - failure 1521 1522 @ref User 1523 **/ 1524 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, 1525 CeedVector assembled, CeedRequest *request) { 1526 int ierr; 1527 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1528 1529 CeedSize input_size = 0, output_size = 0; 1530 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1531 CeedChk(ierr); 1532 if (input_size != output_size) 1533 // LCOV_EXCL_START 1534 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1535 // LCOV_EXCL_STOP 1536 1537 // Use backend version, if available 1538 if (op->LinearAssemblePointBlockDiagonal) { 1539 ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); 1540 CeedChk(ierr); 1541 return CEED_ERROR_SUCCESS; 1542 } else if (op->LinearAssembleAddPointBlockDiagonal) { 1543 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1544 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, 1545 request); CeedChk(ierr); 1546 return CEED_ERROR_SUCCESS; 1547 } else { 1548 // Check for valid fallback resource 1549 const char *resource, *fallback_resource; 1550 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1551 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1552 CeedChk(ierr); 1553 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1554 // Fallback to reference Ceed 1555 if (!op->op_fallback) { 1556 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1557 } 1558 // Assemble 1559 ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, 1560 assembled, request); CeedChk(ierr); 1561 return CEED_ERROR_SUCCESS; 1562 } 1563 } 1564 1565 // Default interface implementation 1566 ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); 1567 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); 1568 CeedChk(ierr); 1569 return CEED_ERROR_SUCCESS; 1570 } 1571 1572 /** 1573 @brief Assemble the point block diagonal of a square linear CeedOperator 1574 1575 This sums into a CeedVector with the point block diagonal of a linear 1576 CeedOperator. 1577 1578 Note: Currently only non-composite CeedOperators with a single field and 1579 composite CeedOperators with single field sub-operators are supported. 1580 1581 Note: Calling this function asserts that setup is complete 1582 and sets the CeedOperator as immutable. 1583 1584 @param op CeedOperator to assemble CeedQFunction 1585 @param[out] assembled CeedVector to store assembled CeedOperator point block 1586 diagonal, provided in row-major form with an 1587 @a num_comp * @a num_comp block at each node. The dimensions 1588 of this vector are derived from the active vector 1589 for the CeedOperator. The array has shape 1590 [nodes, component out, component in]. 1591 @param request Address of CeedRequest for non-blocking completion, else 1592 CEED_REQUEST_IMMEDIATE 1593 1594 @return An error code: 0 - success, otherwise - failure 1595 1596 @ref User 1597 **/ 1598 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, 1599 CeedVector assembled, CeedRequest *request) { 1600 int ierr; 1601 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1602 1603 CeedSize input_size = 0, output_size = 0; 1604 ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); 1605 CeedChk(ierr); 1606 if (input_size != output_size) 1607 // LCOV_EXCL_START 1608 return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); 1609 // LCOV_EXCL_STOP 1610 1611 // Use backend version, if available 1612 if (op->LinearAssembleAddPointBlockDiagonal) { 1613 ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); 1614 CeedChk(ierr); 1615 return CEED_ERROR_SUCCESS; 1616 } else { 1617 // Check for valid fallback resource 1618 const char *resource, *fallback_resource; 1619 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1620 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1621 CeedChk(ierr); 1622 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1623 // Fallback to reference Ceed 1624 if (!op->op_fallback) { 1625 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1626 } 1627 // Assemble 1628 ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, 1629 assembled, request); CeedChk(ierr); 1630 return CEED_ERROR_SUCCESS; 1631 } 1632 } 1633 1634 // Default interface implemenation 1635 bool is_composite; 1636 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1637 if (is_composite) { 1638 ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, 1639 true, assembled); CeedChk(ierr); 1640 return CEED_ERROR_SUCCESS; 1641 } else { 1642 ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); 1643 CeedChk(ierr); 1644 return CEED_ERROR_SUCCESS; 1645 } 1646 } 1647 1648 /** 1649 @brief Fully assemble the nonzero pattern of a linear operator. 1650 1651 Expected to be used in conjunction with CeedOperatorLinearAssemble() 1652 1653 The assembly routines use coordinate format, with num_entries tuples of the 1654 form (i, j, value) which indicate that value should be added to the matrix 1655 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1656 This function returns the number of entries and their (i, j) locations, 1657 while CeedOperatorLinearAssemble() provides the values in the same 1658 ordering. 1659 1660 This will generally be slow unless your operator is low-order. 1661 1662 Note: Calling this function asserts that setup is complete 1663 and sets the CeedOperator as immutable. 1664 1665 @param[in] op CeedOperator to assemble 1666 @param[out] num_entries Number of entries in coordinate nonzero pattern 1667 @param[out] rows Row number for each entry 1668 @param[out] cols Column number for each entry 1669 1670 @ref User 1671 **/ 1672 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, 1673 CeedInt **rows, CeedInt **cols) { 1674 int ierr; 1675 CeedInt num_suboperators, single_entries; 1676 CeedOperator *sub_operators; 1677 bool is_composite; 1678 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1679 1680 // Use backend version, if available 1681 if (op->LinearAssembleSymbolic) { 1682 ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); 1683 return CEED_ERROR_SUCCESS; 1684 } else { 1685 // Check for valid fallback resource 1686 const char *resource, *fallback_resource; 1687 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1688 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1689 CeedChk(ierr); 1690 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1691 // Fallback to reference Ceed 1692 if (!op->op_fallback) { 1693 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1694 } 1695 // Assemble 1696 ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, 1697 cols); CeedChk(ierr); 1698 return CEED_ERROR_SUCCESS; 1699 } 1700 } 1701 1702 // Default interface implementation 1703 1704 // count entries and allocate rows, cols arrays 1705 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1706 *num_entries = 0; 1707 if (is_composite) { 1708 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1709 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1710 for (int k = 0; k < num_suboperators; ++k) { 1711 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1712 &single_entries); CeedChk(ierr); 1713 *num_entries += single_entries; 1714 } 1715 } else { 1716 ierr = CeedSingleOperatorAssemblyCountEntries(op, 1717 &single_entries); CeedChk(ierr); 1718 *num_entries += single_entries; 1719 } 1720 ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr); 1721 ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr); 1722 1723 // assemble nonzero locations 1724 CeedInt offset = 0; 1725 if (is_composite) { 1726 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1727 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1728 for (int k = 0; k < num_suboperators; ++k) { 1729 ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, 1730 *cols); CeedChk(ierr); 1731 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1732 &single_entries); 1733 CeedChk(ierr); 1734 offset += single_entries; 1735 } 1736 } else { 1737 ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols); 1738 CeedChk(ierr); 1739 } 1740 1741 return CEED_ERROR_SUCCESS; 1742 } 1743 1744 /** 1745 @brief Fully assemble the nonzero entries of a linear operator. 1746 1747 Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic() 1748 1749 The assembly routines use coordinate format, with num_entries tuples of the 1750 form (i, j, value) which indicate that value should be added to the matrix 1751 in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. 1752 This function returns the values of the nonzero entries to be added, their 1753 (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic() 1754 1755 This will generally be slow unless your operator is low-order. 1756 1757 Note: Calling this function asserts that setup is complete 1758 and sets the CeedOperator as immutable. 1759 1760 @param[in] op CeedOperator to assemble 1761 @param[out] values Values to assemble into matrix 1762 1763 @ref User 1764 **/ 1765 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { 1766 int ierr; 1767 CeedInt num_suboperators, single_entries = 0; 1768 CeedOperator *sub_operators; 1769 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 1770 1771 // Use backend version, if available 1772 if (op->LinearAssemble) { 1773 ierr = op->LinearAssemble(op, values); CeedChk(ierr); 1774 return CEED_ERROR_SUCCESS; 1775 } else { 1776 // Check for valid fallback resource 1777 const char *resource, *fallback_resource; 1778 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 1779 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 1780 CeedChk(ierr); 1781 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 1782 // Fallback to reference Ceed 1783 if (!op->op_fallback) { 1784 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 1785 } 1786 // Assemble 1787 ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); 1788 return CEED_ERROR_SUCCESS; 1789 } 1790 } 1791 1792 // Default interface implementation 1793 bool is_composite; 1794 ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); 1795 1796 CeedInt offset = 0; 1797 if (is_composite) { 1798 ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); 1799 ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); 1800 for (int k = 0; k < num_suboperators; ++k) { 1801 ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); 1802 CeedChk(ierr); 1803 ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], 1804 &single_entries); 1805 CeedChk(ierr); 1806 offset += single_entries; 1807 } 1808 } else { 1809 ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr); 1810 } 1811 1812 return CEED_ERROR_SUCCESS; 1813 } 1814 1815 /** 1816 @brief Create a multigrid coarse operator and level transfer operators 1817 for a CeedOperator, creating the prolongation basis from the 1818 fine and coarse grid interpolation 1819 1820 Note: Calling this function asserts that setup is complete 1821 and sets the CeedOperator as immutable. 1822 1823 @param[in] op_fine Fine grid operator 1824 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1825 @param[in] rstr_coarse Coarse grid restriction 1826 @param[in] basis_coarse Coarse grid active vector basis 1827 @param[out] op_coarse Coarse grid operator 1828 @param[out] op_prolong Coarse to fine operator 1829 @param[out] op_restrict Fine to coarse operator 1830 1831 @return An error code: 0 - success, otherwise - failure 1832 1833 @ref User 1834 **/ 1835 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, 1836 CeedVector p_mult_fine, 1837 CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1838 CeedOperator *op_coarse, CeedOperator *op_prolong, 1839 CeedOperator *op_restrict) { 1840 int ierr; 1841 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1842 Ceed ceed; 1843 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1844 1845 // Check for compatible quadrature spaces 1846 CeedBasis basis_fine; 1847 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1848 CeedInt Q_f, Q_c; 1849 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1850 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1851 if (Q_f != Q_c) 1852 // LCOV_EXCL_START 1853 return CeedError(ceed, CEED_ERROR_DIMENSION, 1854 "Bases must have compatible quadrature spaces"); 1855 // LCOV_EXCL_STOP 1856 1857 // Coarse to fine basis 1858 CeedInt P_f, P_c, Q = Q_f; 1859 bool is_tensor_f, is_tensor_c; 1860 ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); 1861 ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); 1862 CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; 1863 if (is_tensor_f && is_tensor_c) { 1864 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); 1865 ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); 1866 ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); 1867 } else if (!is_tensor_f && !is_tensor_c) { 1868 ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); 1869 ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); 1870 } else { 1871 // LCOV_EXCL_START 1872 return CeedError(ceed, CEED_ERROR_MINOR, 1873 "Bases must both be tensor or non-tensor"); 1874 // LCOV_EXCL_STOP 1875 } 1876 1877 ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); 1878 ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); 1879 ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); 1880 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 1881 if (is_tensor_f) { 1882 memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]); 1883 memcpy(interp_c, basis_coarse->interp_1d, 1884 Q*P_c*sizeof basis_coarse->interp_1d[0]); 1885 } else { 1886 memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]); 1887 memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]); 1888 } 1889 1890 // -- QR Factorization, interp_f = Q R 1891 ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); 1892 1893 // -- Apply Qtranspose, interp_c = Qtranspose interp_c 1894 ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, 1895 Q, P_c, P_f, P_c, 1); CeedChk(ierr); 1896 1897 // -- Apply Rinv, interp_c_to_f = Rinv interp_c 1898 for (CeedInt j=0; j<P_c; j++) { // Column j 1899 interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1]; 1900 for (CeedInt i=P_f-2; i>=0; i--) { // Row i 1901 interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; 1902 for (CeedInt k=i+1; k<P_f; k++) 1903 interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k]; 1904 interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i]; 1905 } 1906 } 1907 ierr = CeedFree(&tau); CeedChk(ierr); 1908 ierr = CeedFree(&interp_c); CeedChk(ierr); 1909 ierr = CeedFree(&interp_f); CeedChk(ierr); 1910 1911 // Complete with interp_c_to_f versions of code 1912 if (is_tensor_f) { 1913 ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine, 1914 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1915 CeedChk(ierr); 1916 } else { 1917 ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine, 1918 rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict); 1919 CeedChk(ierr); 1920 } 1921 1922 // Cleanup 1923 ierr = CeedFree(&interp_c_to_f); CeedChk(ierr); 1924 return CEED_ERROR_SUCCESS; 1925 } 1926 1927 /** 1928 @brief Create a multigrid coarse operator and level transfer operators 1929 for a CeedOperator with a tensor basis for the active basis 1930 1931 Note: Calling this function asserts that setup is complete 1932 and sets the CeedOperator as immutable. 1933 1934 @param[in] op_fine Fine grid operator 1935 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 1936 @param[in] rstr_coarse Coarse grid restriction 1937 @param[in] basis_coarse Coarse grid active vector basis 1938 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 1939 @param[out] op_coarse Coarse grid operator 1940 @param[out] op_prolong Coarse to fine operator 1941 @param[out] op_restrict Fine to coarse operator 1942 1943 @return An error code: 0 - success, otherwise - failure 1944 1945 @ref User 1946 **/ 1947 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, 1948 CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, 1949 const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, 1950 CeedOperator *op_prolong, CeedOperator *op_restrict) { 1951 int ierr; 1952 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 1953 Ceed ceed; 1954 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 1955 1956 // Check for compatible quadrature spaces 1957 CeedBasis basis_fine; 1958 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 1959 CeedInt Q_f, Q_c; 1960 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 1961 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 1962 if (Q_f != Q_c) 1963 // LCOV_EXCL_START 1964 return CeedError(ceed, CEED_ERROR_DIMENSION, 1965 "Bases must have compatible quadrature spaces"); 1966 // LCOV_EXCL_STOP 1967 1968 // Coarse to fine basis 1969 CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c; 1970 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 1971 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 1972 ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr); 1973 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 1974 CeedChk(ierr); 1975 P_1d_c = dim == 1 ? num_nodes_c : 1976 dim == 2 ? sqrt(num_nodes_c) : 1977 cbrt(num_nodes_c); 1978 CeedScalar *q_ref, *q_weight, *grad; 1979 ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr); 1980 ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr); 1981 ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr); 1982 CeedBasis basis_c_to_f; 1983 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, 1984 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 1985 CeedChk(ierr); 1986 ierr = CeedFree(&q_ref); CeedChk(ierr); 1987 ierr = CeedFree(&q_weight); CeedChk(ierr); 1988 ierr = CeedFree(&grad); CeedChk(ierr); 1989 1990 // Core code 1991 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 1992 basis_coarse, basis_c_to_f, op_coarse, 1993 op_prolong, op_restrict); 1994 CeedChk(ierr); 1995 return CEED_ERROR_SUCCESS; 1996 } 1997 1998 /** 1999 @brief Create a multigrid coarse operator and level transfer operators 2000 for a CeedOperator with a non-tensor basis for the active vector 2001 2002 Note: Calling this function asserts that setup is complete 2003 and sets the CeedOperator as immutable. 2004 2005 @param[in] op_fine Fine grid operator 2006 @param[in] p_mult_fine L-vector multiplicity in parallel gather/scatter 2007 @param[in] rstr_coarse Coarse grid restriction 2008 @param[in] basis_coarse Coarse grid active vector basis 2009 @param[in] interp_c_to_f Matrix for coarse to fine interpolation 2010 @param[out] op_coarse Coarse grid operator 2011 @param[out] op_prolong Coarse to fine operator 2012 @param[out] op_restrict Fine to coarse operator 2013 2014 @return An error code: 0 - success, otherwise - failure 2015 2016 @ref User 2017 **/ 2018 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, 2019 CeedVector p_mult_fine, 2020 CeedElemRestriction rstr_coarse, 2021 CeedBasis basis_coarse, 2022 const CeedScalar *interp_c_to_f, 2023 CeedOperator *op_coarse, 2024 CeedOperator *op_prolong, 2025 CeedOperator *op_restrict) { 2026 int ierr; 2027 ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); 2028 Ceed ceed; 2029 ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); 2030 2031 // Check for compatible quadrature spaces 2032 CeedBasis basis_fine; 2033 ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); 2034 CeedInt Q_f, Q_c; 2035 ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); 2036 ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); 2037 if (Q_f != Q_c) 2038 // LCOV_EXCL_START 2039 return CeedError(ceed, CEED_ERROR_DIMENSION, 2040 "Bases must have compatible quadrature spaces"); 2041 // LCOV_EXCL_STOP 2042 2043 // Coarse to fine basis 2044 CeedElemTopology topo; 2045 ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr); 2046 CeedInt dim, num_comp, num_nodes_c, num_nodes_f; 2047 ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr); 2048 ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr); 2049 ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr); 2050 ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c); 2051 CeedChk(ierr); 2052 CeedScalar *q_ref, *q_weight, *grad; 2053 ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr); 2054 ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr); 2055 ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr); 2056 CeedBasis basis_c_to_f; 2057 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, 2058 interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f); 2059 CeedChk(ierr); 2060 ierr = CeedFree(&q_ref); CeedChk(ierr); 2061 ierr = CeedFree(&q_weight); CeedChk(ierr); 2062 ierr = CeedFree(&grad); CeedChk(ierr); 2063 2064 // Core code 2065 ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, 2066 basis_coarse, basis_c_to_f, op_coarse, 2067 op_prolong, op_restrict); 2068 CeedChk(ierr); 2069 return CEED_ERROR_SUCCESS; 2070 } 2071 2072 /** 2073 @brief Build a FDM based approximate inverse for each element for a 2074 CeedOperator 2075 2076 This returns a CeedOperator and CeedVector to apply a Fast Diagonalization 2077 Method based approximate inverse. This function obtains the simultaneous 2078 diagonalization for the 1D mass and Laplacian operators, 2079 M = V^T V, K = V^T S V. 2080 The assembled QFunction is used to modify the eigenvalues from simultaneous 2081 diagonalization and obtain an approximate inverse of the form 2082 V^T S^hat V. The CeedOperator must be linear and non-composite. The 2083 associated CeedQFunction must therefore also be linear. 2084 2085 Note: Calling this function asserts that setup is complete 2086 and sets the CeedOperator as immutable. 2087 2088 @param op CeedOperator to create element inverses 2089 @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse 2090 for each element 2091 @param request Address of CeedRequest for non-blocking completion, else 2092 @ref CEED_REQUEST_IMMEDIATE 2093 2094 @return An error code: 0 - success, otherwise - failure 2095 2096 @ref User 2097 **/ 2098 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, 2099 CeedRequest *request) { 2100 int ierr; 2101 ierr = CeedOperatorCheckReady(op); CeedChk(ierr); 2102 2103 // Use backend version, if available 2104 if (op->CreateFDMElementInverse) { 2105 ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); 2106 return CEED_ERROR_SUCCESS; 2107 } else { 2108 // Check for valid fallback resource 2109 const char *resource, *fallback_resource; 2110 ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); 2111 ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); 2112 CeedChk(ierr); 2113 if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { 2114 // Fallback to reference Ceed 2115 if (!op->op_fallback) { 2116 ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); 2117 } 2118 // Assemble 2119 ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); 2120 CeedChk(ierr); 2121 return CEED_ERROR_SUCCESS; 2122 } 2123 } 2124 2125 // Interface implementation 2126 Ceed ceed, ceed_parent; 2127 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 2128 ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); 2129 ceed_parent = ceed_parent ? ceed_parent : ceed; 2130 CeedQFunction qf; 2131 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 2132 2133 // Determine active input basis 2134 bool interp = false, grad = false; 2135 CeedBasis basis = NULL; 2136 CeedElemRestriction rstr = NULL; 2137 CeedOperatorField *op_fields; 2138 CeedQFunctionField *qf_fields; 2139 CeedInt num_input_fields; 2140 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL); 2141 CeedChk(ierr); 2142 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); 2143 for (CeedInt i=0; i<num_input_fields; i++) { 2144 CeedVector vec; 2145 ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr); 2146 if (vec == CEED_VECTOR_ACTIVE) { 2147 CeedEvalMode eval_mode; 2148 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr); 2149 interp = interp || eval_mode == CEED_EVAL_INTERP; 2150 grad = grad || eval_mode == CEED_EVAL_GRAD; 2151 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr); 2152 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr); 2153 } 2154 } 2155 if (!basis) 2156 // LCOV_EXCL_START 2157 return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set"); 2158 // LCOV_EXCL_STOP 2159 CeedSize l_size = 1; 2160 CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1; 2161 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 2162 ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr); 2163 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 2164 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 2165 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 2166 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 2167 ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); 2168 ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); 2169 2170 // Build and diagonalize 1D Mass and Laplacian 2171 bool tensor_basis; 2172 ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr); 2173 if (!tensor_basis) 2174 // LCOV_EXCL_START 2175 return CeedError(ceed, CEED_ERROR_BACKEND, 2176 "FDMElementInverse only supported for tensor " 2177 "bases"); 2178 // LCOV_EXCL_STOP 2179 CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda; 2180 ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr); 2181 ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr); 2182 ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr); 2183 ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr); 2184 ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr); 2185 // -- Build matrices 2186 const CeedScalar *interp_1d, *grad_1d, *q_weight_1d; 2187 ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr); 2188 ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr); 2189 ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr); 2190 ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, 2191 mass, laplace); CeedChk(ierr); 2192 2193 // -- Diagonalize 2194 ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d); 2195 CeedChk(ierr); 2196 ierr = CeedFree(&mass); CeedChk(ierr); 2197 ierr = CeedFree(&laplace); CeedChk(ierr); 2198 for (CeedInt i=0; i<P_1d; i++) 2199 for (CeedInt j=0; j<P_1d; j++) 2200 fdm_interp[i+j*P_1d] = x[j+i*P_1d]; 2201 ierr = CeedFree(&x); CeedChk(ierr); 2202 2203 // Assemble QFunction 2204 CeedVector assembled; 2205 CeedElemRestriction rstr_qf; 2206 ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, 2207 &rstr_qf, request); CeedChk(ierr); 2208 CeedInt layout[3]; 2209 ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr); 2210 ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr); 2211 CeedScalar max_norm = 0; 2212 ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr); 2213 2214 // Calculate element averages 2215 CeedInt num_modes = (interp?1:0) + (grad?dim:0); 2216 CeedScalar *elem_avg; 2217 const CeedScalar *assembled_array, *q_weight_array; 2218 CeedVector q_weight; 2219 ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr); 2220 ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 2221 CEED_VECTOR_NONE, q_weight); CeedChk(ierr); 2222 ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array); 2223 CeedChk(ierr); 2224 ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array); 2225 CeedChk(ierr); 2226 ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr); 2227 const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; 2228 for (CeedInt e=0; e<num_elem; e++) { 2229 CeedInt count = 0; 2230 for (CeedInt q=0; q<num_qpts; q++) 2231 for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++) 2232 if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) > 2233 qf_value_bound) { 2234 elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] / 2235 q_weight_array[q]; 2236 count++; 2237 } 2238 if (count) { 2239 elem_avg[e] /= count; 2240 } else { 2241 elem_avg[e] = 1.0; 2242 } 2243 } 2244 ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr); 2245 ierr = CeedVectorDestroy(&assembled); CeedChk(ierr); 2246 ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr); 2247 ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr); 2248 2249 // Build FDM diagonal 2250 CeedVector q_data; 2251 CeedScalar *q_data_array, *fdm_diagonal; 2252 ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr); 2253 const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON; 2254 for (CeedInt c=0; c<num_comp; c++) 2255 for (CeedInt n=0; n<elem_size; n++) { 2256 if (interp) 2257 fdm_diagonal[c*elem_size + n] = 1.0; 2258 if (grad) 2259 for (CeedInt d=0; d<dim; d++) { 2260 CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d; 2261 fdm_diagonal[c*elem_size + n] += lambda[i]; 2262 } 2263 if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound) 2264 fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound; 2265 } 2266 ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); 2267 CeedChk(ierr); 2268 ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); 2269 ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); 2270 CeedChk(ierr); 2271 for (CeedInt e=0; e<num_elem; e++) 2272 for (CeedInt c=0; c<num_comp; c++) 2273 for (CeedInt n=0; n<elem_size; n++) 2274 q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] * 2275 fdm_diagonal[c*elem_size + n]); 2276 ierr = CeedFree(&elem_avg); CeedChk(ierr); 2277 ierr = CeedFree(&fdm_diagonal); CeedChk(ierr); 2278 ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr); 2279 2280 // Setup FDM operator 2281 // -- Basis 2282 CeedBasis fdm_basis; 2283 CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy; 2284 ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr); 2285 ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr); 2286 ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr); 2287 ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, 2288 fdm_interp, grad_dummy, q_ref_dummy, 2289 q_weight_dummy, &fdm_basis); CeedChk(ierr); 2290 ierr = CeedFree(&fdm_interp); CeedChk(ierr); 2291 ierr = CeedFree(&grad_dummy); CeedChk(ierr); 2292 ierr = CeedFree(&q_ref_dummy); CeedChk(ierr); 2293 ierr = CeedFree(&q_weight_dummy); CeedChk(ierr); 2294 ierr = CeedFree(&lambda); CeedChk(ierr); 2295 2296 // -- Restriction 2297 CeedElemRestriction rstr_qd_i; 2298 CeedInt strides[3] = {1, elem_size, elem_size*num_comp}; 2299 ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, 2300 num_comp, num_elem*num_comp*elem_size, 2301 strides, &rstr_qd_i); CeedChk(ierr); 2302 // -- QFunction 2303 CeedQFunction qf_fdm; 2304 ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm); 2305 CeedChk(ierr); 2306 ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP); 2307 CeedChk(ierr); 2308 ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE); 2309 CeedChk(ierr); 2310 ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP); 2311 CeedChk(ierr); 2312 ierr = CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp); CeedChk(ierr); 2313 // -- QFunction context 2314 CeedInt *num_comp_data; 2315 ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr); 2316 num_comp_data[0] = num_comp; 2317 CeedQFunctionContext ctx_fdm; 2318 ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr); 2319 ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, 2320 sizeof(*num_comp_data), num_comp_data); 2321 CeedChk(ierr); 2322 ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr); 2323 ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr); 2324 // -- Operator 2325 ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv); 2326 CeedChk(ierr); 2327 CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2328 CeedChk(ierr); 2329 CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, 2330 q_data); CeedChk(ierr); 2331 CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE); 2332 CeedChk(ierr); 2333 2334 // Cleanup 2335 ierr = CeedVectorDestroy(&q_data); CeedChk(ierr); 2336 ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr); 2337 ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr); 2338 ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr); 2339 2340 return CEED_ERROR_SUCCESS; 2341 } 2342 2343 /// @} 2344