1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <stdint.h> 21 #include <string.h> 22 #include "ceed-opt.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 28 bool is_input, const CeedInt blk_size, 29 CeedElemRestriction *blk_restr, 30 CeedVector *e_vecs_full, CeedVector *e_vecs, 31 CeedVector *q_vecs, CeedInt start_e, 32 CeedInt num_fields, CeedInt Q) { 33 CeedInt ierr, num_comp, size, P; 34 Ceed ceed; 35 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 36 CeedBasis basis; 37 CeedElemRestriction r; 38 CeedOperatorField *op_fields; 39 CeedQFunctionField *qf_fields; 40 if (is_input) { 41 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); 42 CeedChkBackend(ierr); 43 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); 44 CeedChkBackend(ierr); 45 } else { 46 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields); 47 CeedChkBackend(ierr); 48 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); 49 CeedChkBackend(ierr); 50 } 51 52 // Loop over fields 53 for (CeedInt i=0; i<num_fields; i++) { 54 CeedEvalMode eval_mode; 55 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 56 CeedChkBackend(ierr); 57 58 if (eval_mode != CEED_EVAL_WEIGHT) { 59 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r); 60 CeedChkBackend(ierr); 61 Ceed ceed; 62 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 63 CeedSize l_size; 64 CeedInt num_elem, elem_size, comp_stride; 65 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 66 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 67 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 68 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 69 70 bool strided; 71 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 72 if (strided) { 73 CeedInt strides[3]; 74 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 75 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size, 76 blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]); 77 CeedChkBackend(ierr); 78 } else { 79 const CeedInt *offsets = NULL; 80 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 81 CeedChkBackend(ierr); 82 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 83 ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size, 84 blk_size, num_comp, comp_stride, 85 l_size, CEED_MEM_HOST, 86 CEED_COPY_VALUES, offsets, 87 &blk_restr[i+start_e]); 88 CeedChkBackend(ierr); 89 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 90 } 91 ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL, 92 &e_vecs_full[i+start_e]); 93 CeedChkBackend(ierr); 94 } 95 96 switch(eval_mode) { 97 case CEED_EVAL_NONE: 98 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 99 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]); 100 CeedChkBackend(ierr); 101 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 102 CeedChkBackend(ierr); 103 break; 104 case CEED_EVAL_INTERP: 105 case CEED_EVAL_GRAD: 106 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 107 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 108 ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr); 109 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr); 110 ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]); 111 CeedChkBackend(ierr); 112 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 113 CeedChkBackend(ierr); 114 break; 115 case CEED_EVAL_WEIGHT: // Only on input fields 116 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 117 ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr); 118 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 119 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]); 120 CeedChkBackend(ierr); 121 122 break; 123 case CEED_EVAL_DIV: 124 break; // Not implemented 125 case CEED_EVAL_CURL: 126 break; // Not implemented 127 } 128 if (is_input && !!e_vecs[i]) { 129 ierr = CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST, 130 CEED_COPY_VALUES, NULL); CeedChkBackend(ierr); 131 } 132 } 133 return CEED_ERROR_SUCCESS; 134 } 135 136 //------------------------------------------------------------------------------ 137 // Setup Operator 138 //------------------------------------------------------------------------------ 139 static int CeedOperatorSetup_Opt(CeedOperator op) { 140 int ierr; 141 bool is_setup_done; 142 ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr); 143 if (is_setup_done) return CEED_ERROR_SUCCESS; 144 Ceed ceed; 145 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 146 Ceed_Opt *ceed_impl; 147 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 148 const CeedInt blk_size = ceed_impl->blk_size; 149 CeedOperator_Opt *impl; 150 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 151 CeedQFunction qf; 152 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 153 CeedInt Q, num_input_fields, num_output_fields; 154 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 155 ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr); 156 CeedOperatorField *op_input_fields, *op_output_fields; 157 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 158 &num_output_fields, &op_output_fields); 159 CeedChkBackend(ierr); 160 CeedQFunctionField *qf_input_fields, *qf_output_fields; 161 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 162 &qf_output_fields); 163 CeedChkBackend(ierr); 164 165 // Allocate 166 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr); 167 CeedChkBackend(ierr); 168 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full); 169 CeedChkBackend(ierr); 170 171 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr); 172 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr); 173 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr); 174 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr); 175 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr); 176 177 impl->num_inputs = num_input_fields; 178 impl->num_outputs = num_output_fields; 179 180 // Set up infield and outfield pointer arrays 181 // Infields 182 ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr, 183 impl->e_vecs_full, impl->e_vecs_in, 184 impl->q_vecs_in, 0, num_input_fields, Q); 185 CeedChkBackend(ierr); 186 // Outfields 187 ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr, 188 impl->e_vecs_full, impl->e_vecs_out, 189 impl->q_vecs_out, num_input_fields, 190 num_output_fields, Q); 191 CeedChkBackend(ierr); 192 193 // Identity QFunctions 194 if (impl->is_identity_qf) { 195 CeedEvalMode in_mode, out_mode; 196 CeedQFunctionField *in_fields, *out_fields; 197 ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields); 198 CeedChkBackend(ierr); 199 ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode); 200 CeedChkBackend(ierr); 201 ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode); 202 CeedChkBackend(ierr); 203 204 if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) { 205 impl->is_identity_restr_op = true; 206 } else { 207 ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr); 208 impl->q_vecs_out[0] = impl->q_vecs_in[0]; 209 ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr); 210 } 211 } 212 213 ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 214 215 return CEED_ERROR_SUCCESS; 216 } 217 218 //------------------------------------------------------------------------------ 219 // Setup Input Fields 220 //------------------------------------------------------------------------------ 221 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields, 222 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 223 CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl, 224 CeedRequest *request) { 225 CeedInt ierr; 226 CeedEvalMode eval_mode; 227 CeedVector vec; 228 uint64_t state; 229 230 for (CeedInt i=0; i<num_input_fields; i++) { 231 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 232 CeedChkBackend(ierr); 233 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 234 } else { 235 // Get input vector 236 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 237 CeedChkBackend(ierr); 238 if (vec != CEED_VECTOR_ACTIVE) { 239 // Restrict 240 ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 241 if (state != impl->input_states[i]) { 242 ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE, 243 vec, impl->e_vecs_full[i], request); 244 CeedChkBackend(ierr); 245 impl->input_states[i] = state; 246 } 247 // Get evec 248 ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, 249 (const CeedScalar **) &e_data[i]); 250 CeedChkBackend(ierr); 251 } else { 252 // Set Qvec for CEED_EVAL_NONE 253 if (eval_mode == CEED_EVAL_NONE) { 254 ierr = CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST, 255 (const CeedScalar **)&e_data[i]); 256 CeedChkBackend(ierr); 257 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 258 CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr); 259 ierr = CeedVectorRestoreArrayRead(impl->e_vecs_in[i], 260 (const CeedScalar **)&e_data[i]); 261 CeedChkBackend(ierr); 262 } 263 } 264 } 265 } 266 return CEED_ERROR_SUCCESS; 267 } 268 269 //------------------------------------------------------------------------------ 270 // Input Basis Action 271 //------------------------------------------------------------------------------ 272 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 273 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 274 CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active, 275 CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl, 276 CeedRequest *request) { 277 CeedInt ierr; 278 CeedInt dim, elem_size, size; 279 CeedElemRestriction elem_restr; 280 CeedEvalMode eval_mode; 281 CeedBasis basis; 282 CeedVector vec; 283 284 for (CeedInt i=0; i<num_input_fields; i++) { 285 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 286 CeedChkBackend(ierr); 287 // Skip active input 288 if (skip_active) { 289 if (vec == CEED_VECTOR_ACTIVE) 290 continue; 291 } 292 293 CeedInt active_in = 0; 294 // Get elem_size, eval_mode, size 295 ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr); 296 CeedChkBackend(ierr); 297 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 298 CeedChkBackend(ierr); 299 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 300 CeedChkBackend(ierr); 301 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 302 CeedChkBackend(ierr); 303 // Restrict block active input 304 if (vec == CEED_VECTOR_ACTIVE) { 305 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size, 306 CEED_NOTRANSPOSE, in_vec, 307 impl->e_vecs_in[i], request); 308 CeedChkBackend(ierr); 309 active_in = 1; 310 } 311 // Basis action 312 switch(eval_mode) { 313 case CEED_EVAL_NONE: 314 if (!active_in) { 315 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 316 CEED_USE_POINTER, &e_data[i][e*Q*size]); 317 CeedChkBackend(ierr); 318 } 319 break; 320 case CEED_EVAL_INTERP: 321 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 322 CeedChkBackend(ierr); 323 if (!active_in) { 324 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 325 CEED_USE_POINTER, &e_data[i][e*elem_size*size]); 326 CeedChkBackend(ierr); 327 } 328 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 329 CEED_EVAL_INTERP, impl->e_vecs_in[i], 330 impl->q_vecs_in[i]); CeedChkBackend(ierr); 331 break; 332 case CEED_EVAL_GRAD: 333 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 334 CeedChkBackend(ierr); 335 if (!active_in) { 336 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 337 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 338 CEED_USE_POINTER, 339 &e_data[i][e*elem_size*size/dim]); 340 CeedChkBackend(ierr); 341 } 342 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 343 CEED_EVAL_GRAD, impl->e_vecs_in[i], 344 impl->q_vecs_in[i]); CeedChkBackend(ierr); 345 break; 346 case CEED_EVAL_WEIGHT: 347 break; // No action 348 // LCOV_EXCL_START 349 case CEED_EVAL_DIV: 350 case CEED_EVAL_CURL: { 351 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 352 CeedChkBackend(ierr); 353 Ceed ceed; 354 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 355 return CeedError(ceed, CEED_ERROR_BACKEND, 356 "Ceed evaluation mode not implemented"); 357 // LCOV_EXCL_STOP 358 } 359 } 360 } 361 return CEED_ERROR_SUCCESS; 362 } 363 364 //------------------------------------------------------------------------------ 365 // Output Basis Action 366 //------------------------------------------------------------------------------ 367 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 368 CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, 369 CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields, 370 CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl, 371 CeedRequest *request) { 372 CeedInt ierr; 373 CeedElemRestriction elem_restr; 374 CeedEvalMode eval_mode; 375 CeedBasis basis; 376 CeedVector vec; 377 378 for (CeedInt i=0; i<num_output_fields; i++) { 379 // Get elem_size, eval_mode, size 380 ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr); 381 CeedChkBackend(ierr); 382 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 383 CeedChkBackend(ierr); 384 // Basis action 385 switch(eval_mode) { 386 case CEED_EVAL_NONE: 387 break; // No action 388 case CEED_EVAL_INTERP: 389 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 390 CeedChkBackend(ierr); 391 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 392 CEED_EVAL_INTERP, impl->q_vecs_out[i], 393 impl->e_vecs_out[i]); CeedChkBackend(ierr); 394 break; 395 case CEED_EVAL_GRAD: 396 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 397 CeedChkBackend(ierr); 398 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 399 CEED_EVAL_GRAD, impl->q_vecs_out[i], 400 impl->e_vecs_out[i]); CeedChkBackend(ierr); 401 break; 402 // LCOV_EXCL_START 403 case CEED_EVAL_WEIGHT: { 404 Ceed ceed; 405 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 406 return CeedError(ceed, CEED_ERROR_BACKEND, 407 "CEED_EVAL_WEIGHT cannot be an output " 408 "evaluation mode"); 409 } 410 case CEED_EVAL_DIV: 411 case CEED_EVAL_CURL: { 412 Ceed ceed; 413 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 414 return CeedError(ceed, CEED_ERROR_BACKEND, 415 "Ceed evaluation mode not implemented"); 416 // LCOV_EXCL_STOP 417 } 418 } 419 // Restrict output block 420 // Get output vector 421 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 422 CeedChkBackend(ierr); 423 if (vec == CEED_VECTOR_ACTIVE) 424 vec = out_vec; 425 // Restrict 426 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs], 427 e/blk_size, CEED_TRANSPOSE, 428 impl->e_vecs_out[i], vec, request); 429 CeedChkBackend(ierr); 430 } 431 return CEED_ERROR_SUCCESS; 432 } 433 434 //------------------------------------------------------------------------------ 435 // Restore Input Vectors 436 //------------------------------------------------------------------------------ 437 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields, 438 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 439 CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) { 440 CeedInt ierr; 441 442 for (CeedInt i=0; i<num_input_fields; i++) { 443 CeedEvalMode eval_mode; 444 CeedVector vec; 445 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 446 CeedChkBackend(ierr); 447 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 448 CeedChkBackend(ierr); 449 if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) { 450 ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i], 451 (const CeedScalar **) &e_data[i]); 452 CeedChkBackend(ierr); 453 } 454 } 455 return CEED_ERROR_SUCCESS; 456 } 457 458 //------------------------------------------------------------------------------ 459 // Operator Apply 460 //------------------------------------------------------------------------------ 461 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec, 462 CeedVector out_vec, CeedRequest *request) { 463 int ierr; 464 Ceed ceed; 465 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 466 Ceed_Opt *ceed_impl; 467 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 468 CeedInt blk_size = ceed_impl->blk_size; 469 CeedOperator_Opt *impl; 470 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 471 CeedInt Q, num_input_fields, num_output_fields, num_elem; 472 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 473 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 474 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 475 CeedQFunction qf; 476 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 477 CeedOperatorField *op_input_fields, *op_output_fields; 478 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 479 &num_output_fields, &op_output_fields); 480 CeedChkBackend(ierr); 481 CeedQFunctionField *qf_input_fields, *qf_output_fields; 482 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 483 &qf_output_fields); 484 CeedChkBackend(ierr); 485 CeedEvalMode eval_mode; 486 CeedScalar *e_data[2*CEED_FIELD_MAX] = {0}; 487 488 // Setup 489 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 490 491 // Restriction only operator 492 if (impl->is_identity_restr_op) { 493 for (CeedInt b=0; b<num_blks; b++) { 494 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE, 495 in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr); 496 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE, 497 impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr); 498 } 499 return CEED_ERROR_SUCCESS; 500 } 501 502 // Input Evecs and Restriction 503 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 504 op_input_fields, in_vec, e_data, 505 impl, request); CeedChkBackend(ierr); 506 507 // Output Lvecs, Evecs, and Qvecs 508 for (CeedInt i=0; i<num_output_fields; i++) { 509 // Set Qvec if needed 510 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 511 CeedChkBackend(ierr); 512 if (eval_mode == CEED_EVAL_NONE) { 513 // Set qvec to single block evec 514 ierr = CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST, 515 &e_data[i + num_input_fields]); 516 CeedChkBackend(ierr); 517 ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, 518 CEED_USE_POINTER, e_data[i + num_input_fields]); 519 CeedChkBackend(ierr); 520 ierr = CeedVectorRestoreArray(impl->e_vecs_out[i], 521 &e_data[i + num_input_fields]); 522 CeedChkBackend(ierr); 523 } 524 } 525 526 // Loop through elements 527 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 528 // Input basis apply 529 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 530 num_input_fields, blk_size, in_vec, false, 531 e_data, impl, request); CeedChkBackend(ierr); 532 533 // Q function 534 if (!impl->is_identity_qf) { 535 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 536 CeedChkBackend(ierr); 537 } 538 539 // Output basis apply and restrict 540 ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields, 541 blk_size, num_input_fields, num_output_fields, 542 op, out_vec, impl, request); 543 CeedChkBackend(ierr); 544 } 545 546 // Restore input arrays 547 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 548 op_input_fields, e_data, impl); 549 CeedChkBackend(ierr); 550 551 return CEED_ERROR_SUCCESS; 552 } 553 554 //------------------------------------------------------------------------------ 555 // Core code for linear QFunction assembly 556 //------------------------------------------------------------------------------ 557 static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, 558 bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr, 559 CeedRequest *request) { 560 int ierr; 561 Ceed ceed; 562 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 563 Ceed_Opt *ceed_impl; 564 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 565 const CeedInt blk_size = ceed_impl->blk_size; 566 CeedOperator_Opt *impl; 567 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 568 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 569 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 570 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 571 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 572 CeedQFunction qf; 573 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 574 CeedOperatorField *op_input_fields, *op_output_fields; 575 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 576 &num_output_fields, &op_output_fields); 577 CeedChkBackend(ierr); 578 CeedQFunctionField *qf_input_fields, *qf_output_fields; 579 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 580 &qf_output_fields); 581 CeedChkBackend(ierr); 582 CeedVector vec, l_vec = impl->qf_l_vec; 583 CeedInt num_active_in = impl->num_active_in, 584 num_active_out = impl->num_active_out; 585 CeedVector *active_in = impl->qf_active_in; 586 CeedScalar *a, *tmp; 587 CeedScalar *e_data[2*CEED_FIELD_MAX] = {0}; 588 589 // Setup 590 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 591 592 // Check for identity 593 if (impl->is_identity_qf) 594 // LCOV_EXCL_START 595 return CeedError(ceed, CEED_ERROR_BACKEND, 596 "Assembling identity qfunctions not supported"); 597 // LCOV_EXCL_STOP 598 599 // Input Evecs and Restriction 600 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 601 op_input_fields, NULL, e_data, 602 impl, request); CeedChkBackend(ierr); 603 604 // Count number of active input fields 605 if (!num_active_in) { 606 for (CeedInt i=0; i<num_input_fields; i++) { 607 // Get input vector 608 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 609 CeedChkBackend(ierr); 610 // Check if active input 611 if (vec == CEED_VECTOR_ACTIVE) { 612 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 613 CeedChkBackend(ierr); 614 ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr); 615 ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp); 616 CeedChkBackend(ierr); 617 ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr); 618 for (CeedInt field=0; field<size; field++) { 619 ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]); 620 CeedChkBackend(ierr); 621 ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST, 622 CEED_USE_POINTER, &tmp[field*Q*blk_size]); 623 CeedChkBackend(ierr); 624 } 625 num_active_in += size; 626 ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr); 627 } 628 } 629 impl->num_active_in = num_active_in; 630 impl->qf_active_in = active_in; 631 } 632 633 // Count number of active output fields 634 if (!num_active_out) { 635 for (CeedInt i=0; i<num_output_fields; i++) { 636 // Get output vector 637 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 638 CeedChkBackend(ierr); 639 // Check if active output 640 if (vec == CEED_VECTOR_ACTIVE) { 641 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 642 CeedChkBackend(ierr); 643 num_active_out += size; 644 } 645 } 646 impl->num_active_out = num_active_out; 647 } 648 649 // Check sizes 650 if (!num_active_in || !num_active_out) 651 // LCOV_EXCL_START 652 return CeedError(ceed, CEED_ERROR_BACKEND, 653 "Cannot assemble QFunction without active inputs " 654 "and outputs"); 655 // LCOV_EXCL_STOP 656 657 // Setup l_vec 658 if (!l_vec) { 659 ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out, 660 &l_vec); CeedChkBackend(ierr); 661 ierr = CeedVectorSetValue(l_vec, 0.0); CeedChkBackend(ierr); 662 impl->qf_l_vec = l_vec; 663 } 664 ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 665 666 // Build objects if needed 667 CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; 668 if (build_objects) { 669 // Create output restriction 670 ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q, 671 num_active_in*num_active_out, 672 num_active_in*num_active_out*num_elem*Q, 673 strides, rstr); CeedChkBackend(ierr); 674 // Create assembled vector 675 ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out, 676 assembled); CeedChkBackend(ierr); 677 } 678 679 // Loop through elements 680 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 681 // Input basis apply 682 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 683 num_input_fields, blk_size, NULL, true, 684 e_data, impl, request); CeedChkBackend(ierr); 685 686 // Assemble QFunction 687 for (CeedInt in=0; in<num_active_in; in++) { 688 // Set Inputs 689 ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr); 690 if (num_active_in > 1) { 691 ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in], 692 0.0); CeedChkBackend(ierr); 693 } 694 // Set Outputs 695 for (CeedInt out=0; out<num_output_fields; out++) { 696 // Get output vector 697 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 698 CeedChkBackend(ierr); 699 // Check if active output 700 if (vec == CEED_VECTOR_ACTIVE) { 701 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, 702 CEED_USE_POINTER, a); CeedChkBackend(ierr); 703 ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size); 704 CeedChkBackend(ierr); 705 a += size*Q*blk_size; // Advance the pointer by the size of the output 706 } 707 } 708 // Apply QFunction 709 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 710 CeedChkBackend(ierr); 711 } 712 } 713 714 // Un-set output Qvecs to prevent accidental overwrite of Assembled 715 for (CeedInt out=0; out<num_output_fields; out++) { 716 // Get output vector 717 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 718 CeedChkBackend(ierr); 719 // Check if active output 720 if (vec == CEED_VECTOR_ACTIVE) { 721 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, 722 NULL); CeedChkBackend(ierr); 723 } 724 } 725 726 // Restore input arrays 727 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 728 op_input_fields, e_data, impl); 729 CeedChkBackend(ierr); 730 731 // Output blocked restriction 732 ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr); 733 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 734 CeedElemRestriction blk_rstr = impl->qf_blk_rstr; 735 if (!blk_rstr) { 736 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size, 737 num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q, 738 strides, &blk_rstr); CeedChkBackend(ierr); 739 impl->qf_blk_rstr = blk_rstr; 740 } 741 ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled, 742 request); CeedChkBackend(ierr); 743 744 return CEED_ERROR_SUCCESS; 745 } 746 747 //------------------------------------------------------------------------------ 748 // Assemble Linear QFunction 749 //------------------------------------------------------------------------------ 750 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op, 751 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 752 return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr, 753 request); 754 } 755 756 //------------------------------------------------------------------------------ 757 // Update Assembled Linear QFunction 758 //------------------------------------------------------------------------------ 759 static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op, 760 CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 761 return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled, 762 &rstr, request); 763 } 764 765 //------------------------------------------------------------------------------ 766 // Operator Destroy 767 //------------------------------------------------------------------------------ 768 static int CeedOperatorDestroy_Opt(CeedOperator op) { 769 int ierr; 770 CeedOperator_Opt *impl; 771 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 772 773 for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) { 774 ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr); 775 ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr); 776 } 777 ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr); 778 ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr); 779 ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr); 780 781 for (CeedInt i=0; i<impl->num_inputs; i++) { 782 ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr); 783 ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr); 784 } 785 ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr); 786 ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr); 787 788 for (CeedInt i=0; i<impl->num_outputs; i++) { 789 ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr); 790 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 791 } 792 ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr); 793 ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr); 794 795 // QFunction assembly data 796 for (CeedInt i=0; i<impl->num_active_in; i++) { 797 ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr); 798 } 799 ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr); 800 ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr); 801 ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr); 802 803 ierr = CeedFree(&impl); CeedChkBackend(ierr); 804 return CEED_ERROR_SUCCESS; 805 } 806 807 //------------------------------------------------------------------------------ 808 // Operator Create 809 //------------------------------------------------------------------------------ 810 int CeedOperatorCreate_Opt(CeedOperator op) { 811 int ierr; 812 Ceed ceed; 813 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 814 Ceed_Opt *ceed_impl; 815 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 816 CeedInt blk_size = ceed_impl->blk_size; 817 CeedOperator_Opt *impl; 818 819 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 820 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 821 822 if (blk_size != 1 && blk_size != 8) 823 // LCOV_EXCL_START 824 return CeedError(ceed, CEED_ERROR_BACKEND, 825 "Opt backend cannot use blocksize: %d", blk_size); 826 // LCOV_EXCL_STOP 827 828 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 829 CeedOperatorLinearAssembleQFunction_Opt); 830 CeedChkBackend(ierr); 831 ierr = CeedSetBackendFunction(ceed, "Operator", op, 832 "LinearAssembleQFunctionUpdate", 833 CeedOperatorLinearAssembleQFunctionUpdate_Opt); 834 CeedChkBackend(ierr); 835 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 836 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr); 837 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 838 CeedOperatorDestroy_Opt); CeedChkBackend(ierr); 839 return CEED_ERROR_SUCCESS; 840 } 841 //------------------------------------------------------------------------------ 842