1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <stdint.h> 21 #include <string.h> 22 #include "ceed-opt.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 28 bool inOrOut, const CeedInt blk_size, 29 CeedElemRestriction *blk_restr, 30 CeedVector *full_evecs, CeedVector *e_vecs, 31 CeedVector *q_vecs, CeedInt start_e, 32 CeedInt num_fields, CeedInt Q) { 33 CeedInt dim, ierr, num_comp, size, P; 34 Ceed ceed; 35 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 36 CeedBasis basis; 37 CeedElemRestriction r; 38 CeedOperatorField *op_fields; 39 CeedQFunctionField *qf_fields; 40 if (inOrOut) { 41 ierr = CeedOperatorGetFields(op, NULL, &op_fields); 42 CeedChkBackend(ierr); 43 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields); 44 CeedChkBackend(ierr); 45 } else { 46 ierr = CeedOperatorGetFields(op, &op_fields, NULL); 47 CeedChkBackend(ierr); 48 ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); 49 CeedChkBackend(ierr); 50 } 51 52 // Loop over fields 53 for (CeedInt i=0; i<num_fields; i++) { 54 CeedEvalMode eval_mode; 55 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 56 CeedChkBackend(ierr); 57 58 if (eval_mode != CEED_EVAL_WEIGHT) { 59 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r); 60 CeedChkBackend(ierr); 61 Ceed ceed; 62 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 63 CeedInt num_elem, elem_size, l_size, comp_stride; 64 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 65 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 66 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 67 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 68 69 bool strided; 70 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 71 if (strided) { 72 CeedInt strides[3]; 73 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 74 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size, 75 blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]); 76 CeedChkBackend(ierr); 77 } else { 78 const CeedInt *offsets = NULL; 79 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 80 CeedChkBackend(ierr); 81 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 82 ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size, 83 blk_size, num_comp, comp_stride, 84 l_size, CEED_MEM_HOST, 85 CEED_COPY_VALUES, offsets, 86 &blk_restr[i+start_e]); 87 CeedChkBackend(ierr); 88 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 89 } 90 ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL, 91 &full_evecs[i+start_e]); 92 CeedChkBackend(ierr); 93 } 94 95 switch(eval_mode) { 96 case CEED_EVAL_NONE: 97 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 98 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]); 99 CeedChkBackend(ierr); 100 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 101 CeedChkBackend(ierr); 102 break; 103 case CEED_EVAL_INTERP: 104 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 105 ierr = CeedElemRestrictionGetElementSize(r, &P); 106 CeedChkBackend(ierr); 107 ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]); 108 CeedChkBackend(ierr); 109 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 110 CeedChkBackend(ierr); 111 break; 112 case CEED_EVAL_GRAD: 113 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 114 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 115 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 116 ierr = CeedElemRestrictionGetElementSize(r, &P); 117 CeedChkBackend(ierr); 118 ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]); 119 CeedChkBackend(ierr); 120 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 121 CeedChkBackend(ierr); 122 break; 123 case CEED_EVAL_WEIGHT: // Only on input fields 124 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 125 ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr); 126 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 127 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]); 128 CeedChkBackend(ierr); 129 130 break; 131 case CEED_EVAL_DIV: 132 break; // Not implemented 133 case CEED_EVAL_CURL: 134 break; // Not implemented 135 } 136 } 137 return CEED_ERROR_SUCCESS; 138 } 139 140 //------------------------------------------------------------------------------ 141 // Setup Operator 142 //------------------------------------------------------------------------------ 143 static int CeedOperatorSetup_Opt(CeedOperator op) { 144 int ierr; 145 bool setup_done; 146 ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr); 147 if (setup_done) return CEED_ERROR_SUCCESS; 148 Ceed ceed; 149 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 150 Ceed_Opt *ceed_impl; 151 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 152 const CeedInt blk_size = ceed_impl->blk_size; 153 CeedOperator_Opt *impl; 154 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 155 CeedQFunction qf; 156 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 157 CeedInt Q, num_input_fields, num_output_fields; 158 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 159 ierr = CeedQFunctionIsIdentity(qf, &impl->identity_qf); CeedChkBackend(ierr); 160 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 161 CeedChkBackend(ierr); 162 CeedOperatorField *op_input_fields, *op_output_fields; 163 ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields); 164 CeedChkBackend(ierr); 165 CeedQFunctionField *qf_input_fields, *qf_output_fields; 166 ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields); 167 CeedChkBackend(ierr); 168 169 // Allocate 170 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr); 171 CeedChkBackend(ierr); 172 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs); 173 CeedChkBackend(ierr); 174 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data); 175 CeedChkBackend(ierr); 176 177 ierr = CeedCalloc(16, &impl->input_state); CeedChkBackend(ierr); 178 ierr = CeedCalloc(16, &impl->e_vecs_in); CeedChkBackend(ierr); 179 ierr = CeedCalloc(16, &impl->e_vecs_out); CeedChkBackend(ierr); 180 ierr = CeedCalloc(16, &impl->q_vecs_in); CeedChkBackend(ierr); 181 ierr = CeedCalloc(16, &impl->q_vecs_out); CeedChkBackend(ierr); 182 183 impl->num_e_vecs_in = num_input_fields; 184 impl->num_e_vecs_out = num_output_fields; 185 186 // Set up infield and outfield pointer arrays 187 // Infields 188 ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blk_size, impl->blk_restr, 189 impl->e_vecs, impl->e_vecs_in, 190 impl->q_vecs_in, 0, num_input_fields, Q); 191 CeedChkBackend(ierr); 192 // Outfields 193 ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blk_size, impl->blk_restr, 194 impl->e_vecs, impl->e_vecs_out, 195 impl->q_vecs_out, num_input_fields, 196 num_output_fields, Q); 197 CeedChkBackend(ierr); 198 199 // Identity QFunctions 200 if (impl->identity_qf) { 201 CeedEvalMode in_mode, out_mode; 202 CeedQFunctionField *in_fields, *out_fields; 203 ierr = CeedQFunctionGetFields(qf, &in_fields, &out_fields); 204 CeedChkBackend(ierr); 205 206 for (CeedInt i=0; i<num_input_fields; i++) { 207 ierr = CeedQFunctionFieldGetEvalMode(in_fields[i], &in_mode); 208 CeedChkBackend(ierr); 209 ierr = CeedQFunctionFieldGetEvalMode(out_fields[i], &out_mode); 210 CeedChkBackend(ierr); 211 212 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 213 impl->q_vecs_out[i] = impl->q_vecs_in[i]; 214 ierr = CeedVectorAddReference(impl->q_vecs_in[i]); CeedChkBackend(ierr); 215 } 216 } 217 218 ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 219 220 return CEED_ERROR_SUCCESS; 221 } 222 223 //------------------------------------------------------------------------------ 224 // Setup Input Fields 225 //------------------------------------------------------------------------------ 226 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields, 227 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 228 CeedVector in_vec, CeedOperator_Opt *impl, CeedRequest *request) { 229 CeedInt ierr; 230 CeedEvalMode eval_mode; 231 CeedVector vec; 232 uint64_t state; 233 234 for (CeedInt i=0; i<num_input_fields; i++) { 235 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 236 CeedChkBackend(ierr); 237 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 238 } else { 239 // Get input vector 240 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 241 CeedChkBackend(ierr); 242 if (vec != CEED_VECTOR_ACTIVE) { 243 // Restrict 244 ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 245 if (state != impl->input_state[i]) { 246 ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE, 247 vec, impl->e_vecs[i], request); 248 CeedChkBackend(ierr); 249 impl->input_state[i] = state; 250 } 251 } else { 252 // Set Qvec for CEED_EVAL_NONE 253 if (eval_mode == CEED_EVAL_NONE) { 254 ierr = CeedVectorGetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 255 &impl->e_data[i]); CeedChkBackend(ierr); 256 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 257 CEED_USE_POINTER, 258 impl->e_data[i]); CeedChkBackend(ierr); 259 ierr = CeedVectorRestoreArray(impl->e_vecs_in[i], 260 &impl->e_data[i]); CeedChkBackend(ierr); 261 } 262 } 263 // Get evec 264 ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST, 265 (const CeedScalar **) &impl->e_data[i]); 266 CeedChkBackend(ierr); 267 } 268 } 269 return CEED_ERROR_SUCCESS; 270 } 271 272 //------------------------------------------------------------------------------ 273 // Input Basis Action 274 //------------------------------------------------------------------------------ 275 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 276 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 277 CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active, 278 CeedOperator_Opt *impl, CeedRequest *request) { 279 CeedInt ierr; 280 CeedInt dim, elem_size, size; 281 CeedElemRestriction elem_restr; 282 CeedEvalMode eval_mode; 283 CeedBasis basis; 284 CeedVector vec; 285 286 for (CeedInt i=0; i<num_input_fields; i++) { 287 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 288 CeedChkBackend(ierr); 289 // Skip active input 290 if (skip_active) { 291 if (vec == CEED_VECTOR_ACTIVE) 292 continue; 293 } 294 295 CeedInt active_in = 0; 296 // Get elem_size, eval_mode, size 297 ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr); 298 CeedChkBackend(ierr); 299 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 300 CeedChkBackend(ierr); 301 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 302 CeedChkBackend(ierr); 303 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 304 CeedChkBackend(ierr); 305 // Restrict block active input 306 if (vec == CEED_VECTOR_ACTIVE) { 307 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size, 308 CEED_NOTRANSPOSE, in_vec, 309 impl->e_vecs_in[i], request); 310 CeedChkBackend(ierr); 311 active_in = 1; 312 } 313 // Basis action 314 switch(eval_mode) { 315 case CEED_EVAL_NONE: 316 if (!active_in) { 317 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 318 CEED_USE_POINTER, 319 &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr); 320 } 321 break; 322 case CEED_EVAL_INTERP: 323 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 324 CeedChkBackend(ierr); 325 if (!active_in) { 326 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 327 CEED_USE_POINTER, 328 &impl->e_data[i][e*elem_size*size]); 329 CeedChkBackend(ierr); 330 } 331 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 332 CEED_EVAL_INTERP, impl->e_vecs_in[i], 333 impl->q_vecs_in[i]); CeedChkBackend(ierr); 334 break; 335 case CEED_EVAL_GRAD: 336 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 337 CeedChkBackend(ierr); 338 if (!active_in) { 339 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 340 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 341 CEED_USE_POINTER, 342 &impl->e_data[i][e*elem_size*size/dim]); 343 CeedChkBackend(ierr); 344 } 345 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 346 CEED_EVAL_GRAD, impl->e_vecs_in[i], 347 impl->q_vecs_in[i]); CeedChkBackend(ierr); 348 break; 349 case CEED_EVAL_WEIGHT: 350 break; // No action 351 // LCOV_EXCL_START 352 case CEED_EVAL_DIV: 353 case CEED_EVAL_CURL: { 354 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 355 CeedChkBackend(ierr); 356 Ceed ceed; 357 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 358 return CeedError(ceed, CEED_ERROR_BACKEND, 359 "Ceed evaluation mode not implemented"); 360 // LCOV_EXCL_STOP 361 } 362 } 363 } 364 return CEED_ERROR_SUCCESS; 365 } 366 367 //------------------------------------------------------------------------------ 368 // Output Basis Action 369 //------------------------------------------------------------------------------ 370 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 371 CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, 372 CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields, 373 CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl, 374 CeedRequest *request) { 375 CeedInt ierr; 376 CeedElemRestriction elem_restr; 377 CeedEvalMode eval_mode; 378 CeedBasis basis; 379 CeedVector vec; 380 381 for (CeedInt i=0; i<num_output_fields; i++) { 382 // Get elem_size, eval_mode, size 383 ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr); 384 CeedChkBackend(ierr); 385 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 386 CeedChkBackend(ierr); 387 // Basis action 388 switch(eval_mode) { 389 case CEED_EVAL_NONE: 390 break; // No action 391 case CEED_EVAL_INTERP: 392 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 393 CeedChkBackend(ierr); 394 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 395 CEED_EVAL_INTERP, impl->q_vecs_out[i], 396 impl->e_vecs_out[i]); CeedChkBackend(ierr); 397 break; 398 case CEED_EVAL_GRAD: 399 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 400 CeedChkBackend(ierr); 401 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 402 CEED_EVAL_GRAD, impl->q_vecs_out[i], 403 impl->e_vecs_out[i]); CeedChkBackend(ierr); 404 break; 405 // LCOV_EXCL_START 406 case CEED_EVAL_WEIGHT: { 407 Ceed ceed; 408 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 409 return CeedError(ceed, CEED_ERROR_BACKEND, 410 "CEED_EVAL_WEIGHT cannot be an output " 411 "evaluation mode"); 412 } 413 case CEED_EVAL_DIV: 414 case CEED_EVAL_CURL: { 415 Ceed ceed; 416 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 417 return CeedError(ceed, CEED_ERROR_BACKEND, 418 "Ceed evaluation mode not implemented"); 419 // LCOV_EXCL_STOP 420 } 421 } 422 // Restrict output block 423 // Get output vector 424 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 425 CeedChkBackend(ierr); 426 if (vec == CEED_VECTOR_ACTIVE) 427 vec = out_vec; 428 // Restrict 429 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_e_vecs_in], 430 e/blk_size, CEED_TRANSPOSE, 431 impl->e_vecs_out[i], vec, request); 432 CeedChkBackend(ierr); 433 } 434 return CEED_ERROR_SUCCESS; 435 } 436 437 //------------------------------------------------------------------------------ 438 // Restore Input Vectors 439 //------------------------------------------------------------------------------ 440 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields, 441 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 442 CeedOperator_Opt *impl) { 443 CeedInt ierr; 444 CeedEvalMode eval_mode; 445 446 for (CeedInt i=0; i<num_input_fields; i++) { 447 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 448 CeedChkBackend(ierr); 449 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 450 } else { 451 ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i], 452 (const CeedScalar **) &impl->e_data[i]); 453 CeedChkBackend(ierr); 454 } 455 } 456 return CEED_ERROR_SUCCESS; 457 } 458 459 //------------------------------------------------------------------------------ 460 // Operator Apply 461 //------------------------------------------------------------------------------ 462 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec, 463 CeedVector out_vec, CeedRequest *request) { 464 int ierr; 465 Ceed ceed; 466 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 467 Ceed_Opt *ceed_impl; 468 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 469 CeedInt blk_size = ceed_impl->blk_size; 470 CeedOperator_Opt *impl; 471 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 472 CeedInt Q, num_input_fields, num_output_fields, num_elem; 473 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 474 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 475 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 476 CeedQFunction qf; 477 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 478 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 479 CeedChkBackend(ierr); 480 CeedOperatorField *op_input_fields, *op_output_fields; 481 ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields); 482 CeedChkBackend(ierr); 483 CeedQFunctionField *qf_input_fields, *qf_output_fields; 484 ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields); 485 CeedChkBackend(ierr); 486 CeedEvalMode eval_mode; 487 488 // Setup 489 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 490 491 // Input Evecs and Restriction 492 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 493 op_input_fields, in_vec, impl, request); 494 CeedChkBackend(ierr); 495 496 // Output Lvecs, Evecs, and Qvecs 497 for (CeedInt i=0; i<num_output_fields; i++) { 498 // Set Qvec if needed 499 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 500 CeedChkBackend(ierr); 501 if (eval_mode == CEED_EVAL_NONE) { 502 // Set qvec to single block evec 503 ierr = CeedVectorGetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 504 &impl->e_data[i + num_input_fields]); 505 CeedChkBackend(ierr); 506 ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, 507 CEED_USE_POINTER, impl->e_data[i + num_input_fields]); CeedChkBackend(ierr); 508 ierr = CeedVectorRestoreArray(impl->e_vecs_out[i], 509 &impl->e_data[i + num_input_fields]); 510 CeedChkBackend(ierr); 511 } 512 } 513 514 // Loop through elements 515 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 516 // Input basis apply 517 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 518 num_input_fields, blk_size, in_vec, false, 519 impl, request); CeedChkBackend(ierr); 520 521 // Q function 522 if (!impl->identity_qf) { 523 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 524 CeedChkBackend(ierr); 525 } 526 527 // Output basis apply and restrict 528 ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields, 529 blk_size, num_input_fields, num_output_fields, 530 op, out_vec, impl, request); 531 CeedChkBackend(ierr); 532 } 533 534 // Restore input arrays 535 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 536 op_input_fields, impl); 537 CeedChkBackend(ierr); 538 539 return CEED_ERROR_SUCCESS; 540 } 541 542 //------------------------------------------------------------------------------ 543 // Assemble Linear QFunction 544 //------------------------------------------------------------------------------ 545 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op, 546 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 547 int ierr; 548 Ceed ceed; 549 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 550 Ceed_Opt *ceed_impl; 551 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 552 const CeedInt blk_size = ceed_impl->blk_size; 553 CeedOperator_Opt *impl; 554 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 555 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 556 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 557 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 558 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 559 CeedQFunction qf; 560 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 561 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 562 CeedChkBackend(ierr); 563 CeedOperatorField *op_input_fields, *op_output_fields; 564 ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields); 565 CeedChkBackend(ierr); 566 CeedQFunctionField *qf_input_fields, *qf_output_fields; 567 ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields); 568 CeedChkBackend(ierr); 569 CeedVector vec, lvec; 570 CeedInt num_active_in = 0, num_active_out = 0; 571 CeedVector *active_in = NULL; 572 CeedScalar *a, *tmp; 573 574 // Setup 575 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 576 577 // Check for identity 578 if (impl->identity_qf) 579 // LCOV_EXCL_START 580 return CeedError(ceed, CEED_ERROR_BACKEND, 581 "Assembling identity qfunctions not supported"); 582 // LCOV_EXCL_STOP 583 584 // Input Evecs and Restriction 585 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 586 op_input_fields, NULL, impl, request); 587 CeedChkBackend(ierr); 588 589 // Count number of active input fields 590 for (CeedInt i=0; i<num_input_fields; i++) { 591 // Get input vector 592 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 593 CeedChkBackend(ierr); 594 // Check if active input 595 if (vec == CEED_VECTOR_ACTIVE) { 596 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 597 CeedChkBackend(ierr); 598 ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr); 599 ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp); 600 CeedChkBackend(ierr); 601 ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr); 602 for (CeedInt field=0; field<size; field++) { 603 ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]); 604 CeedChkBackend(ierr); 605 ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST, 606 CEED_USE_POINTER, &tmp[field*Q*blk_size]); 607 CeedChkBackend(ierr); 608 } 609 num_active_in += size; 610 ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr); 611 } 612 } 613 614 // Count number of active output fields 615 for (CeedInt i=0; i<num_output_fields; i++) { 616 // Get output vector 617 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 618 CeedChkBackend(ierr); 619 // Check if active output 620 if (vec == CEED_VECTOR_ACTIVE) { 621 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 622 CeedChkBackend(ierr); 623 num_active_out += size; 624 } 625 } 626 627 // Check sizes 628 if (!num_active_in || !num_active_out) 629 // LCOV_EXCL_START 630 return CeedError(ceed, CEED_ERROR_BACKEND, 631 "Cannot assemble QFunction without active inputs " 632 "and outputs"); 633 // LCOV_EXCL_STOP 634 635 // Setup lvec 636 ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out, 637 &lvec); CeedChkBackend(ierr); 638 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 639 640 // Create output restriction 641 CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; 642 ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q, 643 num_active_in*num_active_out, 644 num_active_in*num_active_out*num_elem*Q, 645 strides, rstr); CeedChkBackend(ierr); 646 // Create assembled vector 647 ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out, 648 assembled); CeedChkBackend(ierr); 649 650 // Loop through elements 651 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 652 // Input basis apply 653 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 654 num_input_fields, blk_size, NULL, true, 655 impl, request); CeedChkBackend(ierr); 656 657 // Assemble QFunction 658 for (CeedInt in=0; in<num_active_in; in++) { 659 // Set Inputs 660 ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr); 661 if (num_active_in > 1) { 662 ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in], 663 0.0); CeedChkBackend(ierr); 664 } 665 // Set Outputs 666 for (CeedInt out=0; out<num_output_fields; out++) { 667 // Get output vector 668 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 669 CeedChkBackend(ierr); 670 // Check if active output 671 if (vec == CEED_VECTOR_ACTIVE) { 672 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, 673 CEED_USE_POINTER, a); CeedChkBackend(ierr); 674 ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size); 675 CeedChkBackend(ierr); 676 a += size*Q*blk_size; // Advance the pointer by the size of the output 677 } 678 } 679 // Apply QFunction 680 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 681 CeedChkBackend(ierr); 682 } 683 } 684 685 // Un-set output Qvecs to prevent accidental overwrite of Assembled 686 for (CeedInt out=0; out<num_output_fields; out++) { 687 // Get output vector 688 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 689 CeedChkBackend(ierr); 690 // Check if active output 691 if (vec == CEED_VECTOR_ACTIVE) { 692 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, 693 NULL); CeedChkBackend(ierr); 694 } 695 } 696 697 // Restore input arrays 698 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 699 op_input_fields, impl); 700 CeedChkBackend(ierr); 701 702 // Output blocked restriction 703 ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr); 704 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 705 CeedElemRestriction blk_rstr; 706 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size, 707 num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q, 708 strides, &blk_rstr); CeedChkBackend(ierr); 709 ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled, 710 request); CeedChkBackend(ierr); 711 712 // Cleanup 713 for (CeedInt i=0; i<num_active_in; i++) { 714 ierr = CeedVectorDestroy(&active_in[i]); CeedChkBackend(ierr); 715 } 716 ierr = CeedFree(&active_in); CeedChkBackend(ierr); 717 ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr); 718 ierr = CeedElemRestrictionDestroy(&blk_rstr); CeedChkBackend(ierr); 719 720 return CEED_ERROR_SUCCESS; 721 } 722 723 //------------------------------------------------------------------------------ 724 // Operator Destroy 725 //------------------------------------------------------------------------------ 726 static int CeedOperatorDestroy_Opt(CeedOperator op) { 727 int ierr; 728 CeedOperator_Opt *impl; 729 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 730 731 for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) { 732 ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr); 733 ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr); 734 } 735 ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr); 736 ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr); 737 ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr); 738 ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr); 739 740 for (CeedInt i=0; i<impl->num_e_vecs_in; i++) { 741 ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr); 742 ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr); 743 } 744 ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr); 745 ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr); 746 747 for (CeedInt i=0; i<impl->num_e_vecs_out; i++) { 748 ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr); 749 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 750 } 751 ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr); 752 ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr); 753 754 ierr = CeedFree(&impl); CeedChkBackend(ierr); 755 return CEED_ERROR_SUCCESS; 756 } 757 758 //------------------------------------------------------------------------------ 759 // Operator Create 760 //------------------------------------------------------------------------------ 761 int CeedOperatorCreate_Opt(CeedOperator op) { 762 int ierr; 763 Ceed ceed; 764 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 765 Ceed_Opt *ceed_impl; 766 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 767 CeedInt blk_size = ceed_impl->blk_size; 768 CeedOperator_Opt *impl; 769 770 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 771 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 772 773 if (blk_size != 1 && blk_size != 8) 774 // LCOV_EXCL_START 775 return CeedError(ceed, CEED_ERROR_BACKEND, 776 "Opt backend cannot use blocksize: %d", blk_size); 777 // LCOV_EXCL_STOP 778 779 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 780 CeedOperatorLinearAssembleQFunction_Opt); 781 CeedChkBackend(ierr); 782 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 783 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr); 784 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 785 CeedOperatorDestroy_Opt); CeedChkBackend(ierr); 786 return CEED_ERROR_SUCCESS; 787 } 788 //------------------------------------------------------------------------------ 789