1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <stdint.h> 21 #include <string.h> 22 #include "ceed-opt.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 28 bool is_input, const CeedInt blk_size, 29 CeedElemRestriction *blk_restr, 30 CeedVector *e_vecs_full, CeedVector *e_vecs, 31 CeedVector *q_vecs, CeedInt start_e, 32 CeedInt num_fields, CeedInt Q) { 33 CeedInt ierr, num_comp, size, P; 34 CeedSize e_size, q_size; 35 Ceed ceed; 36 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 37 CeedBasis basis; 38 CeedElemRestriction r; 39 CeedOperatorField *op_fields; 40 CeedQFunctionField *qf_fields; 41 if (is_input) { 42 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); 43 CeedChkBackend(ierr); 44 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); 45 CeedChkBackend(ierr); 46 } else { 47 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields); 48 CeedChkBackend(ierr); 49 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); 50 CeedChkBackend(ierr); 51 } 52 53 // Loop over fields 54 for (CeedInt i=0; i<num_fields; i++) { 55 CeedEvalMode eval_mode; 56 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 57 CeedChkBackend(ierr); 58 59 if (eval_mode != CEED_EVAL_WEIGHT) { 60 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r); 61 CeedChkBackend(ierr); 62 Ceed ceed; 63 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 64 CeedSize l_size; 65 CeedInt num_elem, elem_size, comp_stride; 66 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 67 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 68 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 69 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 70 71 bool strided; 72 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 73 if (strided) { 74 CeedInt strides[3]; 75 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 76 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size, 77 blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]); 78 CeedChkBackend(ierr); 79 } else { 80 const CeedInt *offsets = NULL; 81 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 82 CeedChkBackend(ierr); 83 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 84 ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size, 85 blk_size, num_comp, comp_stride, 86 l_size, CEED_MEM_HOST, 87 CEED_COPY_VALUES, offsets, 88 &blk_restr[i+start_e]); 89 CeedChkBackend(ierr); 90 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 91 } 92 ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL, 93 &e_vecs_full[i+start_e]); 94 CeedChkBackend(ierr); 95 } 96 97 switch(eval_mode) { 98 case CEED_EVAL_NONE: 99 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 100 e_size = (CeedSize)Q*size*blk_size; 101 ierr = CeedVectorCreate(ceed, e_size, &e_vecs[i]); CeedChkBackend(ierr); 102 q_size = (CeedSize)Q*size*blk_size; 103 ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr); 104 break; 105 case CEED_EVAL_INTERP: 106 case CEED_EVAL_GRAD: 107 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 108 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 109 ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr); 110 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr); 111 e_size = (CeedSize)P*num_comp*blk_size; 112 ierr = CeedVectorCreate(ceed, e_size, &e_vecs[i]); CeedChkBackend(ierr); 113 q_size = (CeedSize)Q*size*blk_size; 114 ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr); 115 break; 116 case CEED_EVAL_WEIGHT: // Only on input fields 117 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 118 q_size = (CeedSize)Q*blk_size; 119 ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr); 120 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 121 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]); 122 CeedChkBackend(ierr); 123 124 break; 125 case CEED_EVAL_DIV: 126 break; // Not implemented 127 case CEED_EVAL_CURL: 128 break; // Not implemented 129 } 130 if (is_input && !!e_vecs[i]) { 131 ierr = CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST, 132 CEED_COPY_VALUES, NULL); CeedChkBackend(ierr); 133 } 134 } 135 return CEED_ERROR_SUCCESS; 136 } 137 138 //------------------------------------------------------------------------------ 139 // Setup Operator 140 //------------------------------------------------------------------------------ 141 static int CeedOperatorSetup_Opt(CeedOperator op) { 142 int ierr; 143 bool is_setup_done; 144 ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr); 145 if (is_setup_done) return CEED_ERROR_SUCCESS; 146 Ceed ceed; 147 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 148 Ceed_Opt *ceed_impl; 149 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 150 const CeedInt blk_size = ceed_impl->blk_size; 151 CeedOperator_Opt *impl; 152 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 153 CeedQFunction qf; 154 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 155 CeedInt Q, num_input_fields, num_output_fields; 156 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 157 ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr); 158 CeedOperatorField *op_input_fields, *op_output_fields; 159 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 160 &num_output_fields, &op_output_fields); 161 CeedChkBackend(ierr); 162 CeedQFunctionField *qf_input_fields, *qf_output_fields; 163 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 164 &qf_output_fields); 165 CeedChkBackend(ierr); 166 167 // Allocate 168 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr); 169 CeedChkBackend(ierr); 170 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full); 171 CeedChkBackend(ierr); 172 173 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr); 174 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr); 175 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr); 176 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr); 177 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr); 178 179 impl->num_inputs = num_input_fields; 180 impl->num_outputs = num_output_fields; 181 182 // Set up infield and outfield pointer arrays 183 // Infields 184 ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr, 185 impl->e_vecs_full, impl->e_vecs_in, 186 impl->q_vecs_in, 0, num_input_fields, Q); 187 CeedChkBackend(ierr); 188 // Outfields 189 ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr, 190 impl->e_vecs_full, impl->e_vecs_out, 191 impl->q_vecs_out, num_input_fields, 192 num_output_fields, Q); 193 CeedChkBackend(ierr); 194 195 // Identity QFunctions 196 if (impl->is_identity_qf) { 197 CeedEvalMode in_mode, out_mode; 198 CeedQFunctionField *in_fields, *out_fields; 199 ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields); 200 CeedChkBackend(ierr); 201 ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode); 202 CeedChkBackend(ierr); 203 ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode); 204 CeedChkBackend(ierr); 205 206 if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) { 207 impl->is_identity_restr_op = true; 208 } else { 209 ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr); 210 impl->q_vecs_out[0] = impl->q_vecs_in[0]; 211 ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr); 212 } 213 } 214 215 ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 216 217 return CEED_ERROR_SUCCESS; 218 } 219 220 //------------------------------------------------------------------------------ 221 // Setup Input Fields 222 //------------------------------------------------------------------------------ 223 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields, 224 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 225 CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl, 226 CeedRequest *request) { 227 CeedInt ierr; 228 CeedEvalMode eval_mode; 229 CeedVector vec; 230 uint64_t state; 231 232 for (CeedInt i=0; i<num_input_fields; i++) { 233 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 234 CeedChkBackend(ierr); 235 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 236 } else { 237 // Get input vector 238 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 239 CeedChkBackend(ierr); 240 if (vec != CEED_VECTOR_ACTIVE) { 241 // Restrict 242 ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 243 if (state != impl->input_states[i]) { 244 ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE, 245 vec, impl->e_vecs_full[i], request); 246 CeedChkBackend(ierr); 247 impl->input_states[i] = state; 248 } 249 // Get evec 250 ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, 251 (const CeedScalar **) &e_data[i]); 252 CeedChkBackend(ierr); 253 } else { 254 // Set Qvec for CEED_EVAL_NONE 255 if (eval_mode == CEED_EVAL_NONE) { 256 ierr = CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST, 257 (const CeedScalar **)&e_data[i]); 258 CeedChkBackend(ierr); 259 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 260 CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr); 261 ierr = CeedVectorRestoreArrayRead(impl->e_vecs_in[i], 262 (const CeedScalar **)&e_data[i]); 263 CeedChkBackend(ierr); 264 } 265 } 266 } 267 } 268 return CEED_ERROR_SUCCESS; 269 } 270 271 //------------------------------------------------------------------------------ 272 // Input Basis Action 273 //------------------------------------------------------------------------------ 274 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 275 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 276 CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active, 277 CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl, 278 CeedRequest *request) { 279 CeedInt ierr; 280 CeedInt dim, elem_size, size; 281 CeedElemRestriction elem_restr; 282 CeedEvalMode eval_mode; 283 CeedBasis basis; 284 CeedVector vec; 285 286 for (CeedInt i=0; i<num_input_fields; i++) { 287 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 288 CeedChkBackend(ierr); 289 // Skip active input 290 if (skip_active) { 291 if (vec == CEED_VECTOR_ACTIVE) 292 continue; 293 } 294 295 CeedInt active_in = 0; 296 // Get elem_size, eval_mode, size 297 ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr); 298 CeedChkBackend(ierr); 299 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 300 CeedChkBackend(ierr); 301 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 302 CeedChkBackend(ierr); 303 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 304 CeedChkBackend(ierr); 305 // Restrict block active input 306 if (vec == CEED_VECTOR_ACTIVE) { 307 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size, 308 CEED_NOTRANSPOSE, in_vec, 309 impl->e_vecs_in[i], request); 310 CeedChkBackend(ierr); 311 active_in = 1; 312 } 313 // Basis action 314 switch(eval_mode) { 315 case CEED_EVAL_NONE: 316 if (!active_in) { 317 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 318 CEED_USE_POINTER, &e_data[i][e*Q*size]); 319 CeedChkBackend(ierr); 320 } 321 break; 322 case CEED_EVAL_INTERP: 323 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 324 CeedChkBackend(ierr); 325 if (!active_in) { 326 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 327 CEED_USE_POINTER, &e_data[i][e*elem_size*size]); 328 CeedChkBackend(ierr); 329 } 330 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 331 CEED_EVAL_INTERP, impl->e_vecs_in[i], 332 impl->q_vecs_in[i]); CeedChkBackend(ierr); 333 break; 334 case CEED_EVAL_GRAD: 335 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 336 CeedChkBackend(ierr); 337 if (!active_in) { 338 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 339 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 340 CEED_USE_POINTER, 341 &e_data[i][e*elem_size*size/dim]); 342 CeedChkBackend(ierr); 343 } 344 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 345 CEED_EVAL_GRAD, impl->e_vecs_in[i], 346 impl->q_vecs_in[i]); CeedChkBackend(ierr); 347 break; 348 case CEED_EVAL_WEIGHT: 349 break; // No action 350 // LCOV_EXCL_START 351 case CEED_EVAL_DIV: 352 case CEED_EVAL_CURL: { 353 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 354 CeedChkBackend(ierr); 355 Ceed ceed; 356 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 357 return CeedError(ceed, CEED_ERROR_BACKEND, 358 "Ceed evaluation mode not implemented"); 359 // LCOV_EXCL_STOP 360 } 361 } 362 } 363 return CEED_ERROR_SUCCESS; 364 } 365 366 //------------------------------------------------------------------------------ 367 // Output Basis Action 368 //------------------------------------------------------------------------------ 369 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 370 CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, 371 CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields, 372 CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl, 373 CeedRequest *request) { 374 CeedInt ierr; 375 CeedElemRestriction elem_restr; 376 CeedEvalMode eval_mode; 377 CeedBasis basis; 378 CeedVector vec; 379 380 for (CeedInt i=0; i<num_output_fields; i++) { 381 // Get elem_size, eval_mode, size 382 ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr); 383 CeedChkBackend(ierr); 384 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 385 CeedChkBackend(ierr); 386 // Basis action 387 switch(eval_mode) { 388 case CEED_EVAL_NONE: 389 break; // No action 390 case CEED_EVAL_INTERP: 391 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 392 CeedChkBackend(ierr); 393 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 394 CEED_EVAL_INTERP, impl->q_vecs_out[i], 395 impl->e_vecs_out[i]); CeedChkBackend(ierr); 396 break; 397 case CEED_EVAL_GRAD: 398 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 399 CeedChkBackend(ierr); 400 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 401 CEED_EVAL_GRAD, impl->q_vecs_out[i], 402 impl->e_vecs_out[i]); CeedChkBackend(ierr); 403 break; 404 // LCOV_EXCL_START 405 case CEED_EVAL_WEIGHT: { 406 Ceed ceed; 407 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 408 return CeedError(ceed, CEED_ERROR_BACKEND, 409 "CEED_EVAL_WEIGHT cannot be an output " 410 "evaluation mode"); 411 } 412 case CEED_EVAL_DIV: 413 case CEED_EVAL_CURL: { 414 Ceed ceed; 415 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 416 return CeedError(ceed, CEED_ERROR_BACKEND, 417 "Ceed evaluation mode not implemented"); 418 // LCOV_EXCL_STOP 419 } 420 } 421 // Restrict output block 422 // Get output vector 423 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 424 CeedChkBackend(ierr); 425 if (vec == CEED_VECTOR_ACTIVE) 426 vec = out_vec; 427 // Restrict 428 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs], 429 e/blk_size, CEED_TRANSPOSE, 430 impl->e_vecs_out[i], vec, request); 431 CeedChkBackend(ierr); 432 } 433 return CEED_ERROR_SUCCESS; 434 } 435 436 //------------------------------------------------------------------------------ 437 // Restore Input Vectors 438 //------------------------------------------------------------------------------ 439 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields, 440 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 441 CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) { 442 CeedInt ierr; 443 444 for (CeedInt i=0; i<num_input_fields; i++) { 445 CeedEvalMode eval_mode; 446 CeedVector vec; 447 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 448 CeedChkBackend(ierr); 449 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 450 CeedChkBackend(ierr); 451 if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) { 452 ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i], 453 (const CeedScalar **) &e_data[i]); 454 CeedChkBackend(ierr); 455 } 456 } 457 return CEED_ERROR_SUCCESS; 458 } 459 460 //------------------------------------------------------------------------------ 461 // Operator Apply 462 //------------------------------------------------------------------------------ 463 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec, 464 CeedVector out_vec, CeedRequest *request) { 465 int ierr; 466 Ceed ceed; 467 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 468 Ceed_Opt *ceed_impl; 469 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 470 CeedInt blk_size = ceed_impl->blk_size; 471 CeedOperator_Opt *impl; 472 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 473 CeedInt Q, num_input_fields, num_output_fields, num_elem; 474 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 475 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 476 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 477 CeedQFunction qf; 478 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 479 CeedOperatorField *op_input_fields, *op_output_fields; 480 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 481 &num_output_fields, &op_output_fields); 482 CeedChkBackend(ierr); 483 CeedQFunctionField *qf_input_fields, *qf_output_fields; 484 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 485 &qf_output_fields); 486 CeedChkBackend(ierr); 487 CeedEvalMode eval_mode; 488 CeedScalar *e_data[2*CEED_FIELD_MAX] = {0}; 489 490 // Setup 491 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 492 493 // Restriction only operator 494 if (impl->is_identity_restr_op) { 495 for (CeedInt b=0; b<num_blks; b++) { 496 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE, 497 in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr); 498 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE, 499 impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr); 500 } 501 return CEED_ERROR_SUCCESS; 502 } 503 504 // Input Evecs and Restriction 505 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 506 op_input_fields, in_vec, e_data, 507 impl, request); CeedChkBackend(ierr); 508 509 // Output Lvecs, Evecs, and Qvecs 510 for (CeedInt i=0; i<num_output_fields; i++) { 511 // Set Qvec if needed 512 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 513 CeedChkBackend(ierr); 514 if (eval_mode == CEED_EVAL_NONE) { 515 // Set qvec to single block evec 516 ierr = CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST, 517 &e_data[i + num_input_fields]); 518 CeedChkBackend(ierr); 519 ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, 520 CEED_USE_POINTER, e_data[i + num_input_fields]); 521 CeedChkBackend(ierr); 522 ierr = CeedVectorRestoreArray(impl->e_vecs_out[i], 523 &e_data[i + num_input_fields]); 524 CeedChkBackend(ierr); 525 } 526 } 527 528 // Loop through elements 529 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 530 // Input basis apply 531 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 532 num_input_fields, blk_size, in_vec, false, 533 e_data, impl, request); CeedChkBackend(ierr); 534 535 // Q function 536 if (!impl->is_identity_qf) { 537 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 538 CeedChkBackend(ierr); 539 } 540 541 // Output basis apply and restrict 542 ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields, 543 blk_size, num_input_fields, num_output_fields, 544 op, out_vec, impl, request); 545 CeedChkBackend(ierr); 546 } 547 548 // Restore input arrays 549 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 550 op_input_fields, e_data, impl); 551 CeedChkBackend(ierr); 552 553 return CEED_ERROR_SUCCESS; 554 } 555 556 //------------------------------------------------------------------------------ 557 // Core code for linear QFunction assembly 558 //------------------------------------------------------------------------------ 559 static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, 560 bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr, 561 CeedRequest *request) { 562 int ierr; 563 Ceed ceed; 564 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 565 Ceed_Opt *ceed_impl; 566 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 567 const CeedInt blk_size = ceed_impl->blk_size; 568 CeedSize q_size; 569 CeedOperator_Opt *impl; 570 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 571 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 572 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 573 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 574 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 575 CeedQFunction qf; 576 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 577 CeedOperatorField *op_input_fields, *op_output_fields; 578 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 579 &num_output_fields, &op_output_fields); 580 CeedChkBackend(ierr); 581 CeedQFunctionField *qf_input_fields, *qf_output_fields; 582 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 583 &qf_output_fields); 584 CeedChkBackend(ierr); 585 CeedVector vec, l_vec = impl->qf_l_vec; 586 CeedInt num_active_in = impl->num_active_in, 587 num_active_out = impl->num_active_out; 588 CeedVector *active_in = impl->qf_active_in; 589 CeedScalar *a, *tmp; 590 CeedScalar *e_data[2*CEED_FIELD_MAX] = {0}; 591 592 // Setup 593 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 594 595 // Check for identity 596 if (impl->is_identity_qf) 597 // LCOV_EXCL_START 598 return CeedError(ceed, CEED_ERROR_BACKEND, 599 "Assembling identity qfunctions not supported"); 600 // LCOV_EXCL_STOP 601 602 // Input Evecs and Restriction 603 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 604 op_input_fields, NULL, e_data, 605 impl, request); CeedChkBackend(ierr); 606 607 // Count number of active input fields 608 if (!num_active_in) { 609 for (CeedInt i=0; i<num_input_fields; i++) { 610 // Get input vector 611 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 612 CeedChkBackend(ierr); 613 // Check if active input 614 if (vec == CEED_VECTOR_ACTIVE) { 615 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 616 CeedChkBackend(ierr); 617 ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr); 618 ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp); 619 CeedChkBackend(ierr); 620 ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr); 621 for (CeedInt field=0; field<size; field++) { 622 q_size = (CeedSize)Q*blk_size; 623 ierr = CeedVectorCreate(ceed, q_size, &active_in[num_active_in+field]); 624 CeedChkBackend(ierr); 625 ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST, 626 CEED_USE_POINTER, &tmp[field*Q*blk_size]); 627 CeedChkBackend(ierr); 628 } 629 num_active_in += size; 630 ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr); 631 } 632 } 633 impl->num_active_in = num_active_in; 634 impl->qf_active_in = active_in; 635 } 636 637 // Count number of active output fields 638 if (!num_active_out) { 639 for (CeedInt i=0; i<num_output_fields; i++) { 640 // Get output vector 641 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 642 CeedChkBackend(ierr); 643 // Check if active output 644 if (vec == CEED_VECTOR_ACTIVE) { 645 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 646 CeedChkBackend(ierr); 647 num_active_out += size; 648 } 649 } 650 impl->num_active_out = num_active_out; 651 } 652 653 // Check sizes 654 if (!num_active_in || !num_active_out) 655 // LCOV_EXCL_START 656 return CeedError(ceed, CEED_ERROR_BACKEND, 657 "Cannot assemble QFunction without active inputs " 658 "and outputs"); 659 // LCOV_EXCL_STOP 660 661 // Setup l_vec 662 if (!l_vec) { 663 CeedSize l_size = (CeedSize)blk_size*Q*num_active_in*num_active_out; 664 ierr = CeedVectorCreate(ceed, l_size, &l_vec); CeedChkBackend(ierr); 665 ierr = CeedVectorSetValue(l_vec, 0.0); CeedChkBackend(ierr); 666 impl->qf_l_vec = l_vec; 667 } 668 669 // Build objects if needed 670 CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; 671 if (build_objects) { 672 // Create output restriction 673 ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q, 674 num_active_in*num_active_out, 675 num_active_in*num_active_out*num_elem*Q, 676 strides, rstr); CeedChkBackend(ierr); 677 // Create assembled vector 678 CeedSize l_size = (CeedSize)num_elem*Q*num_active_in*num_active_out; 679 ierr = CeedVectorCreate(ceed, l_size, assembled); CeedChkBackend(ierr); 680 } 681 682 // Output blocked restriction 683 CeedElemRestriction blk_rstr = impl->qf_blk_rstr; 684 if (!blk_rstr) { 685 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size, 686 num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q, 687 strides, &blk_rstr); CeedChkBackend(ierr); 688 impl->qf_blk_rstr = blk_rstr; 689 } 690 691 // Loop through elements 692 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 693 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 694 ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 695 696 // Input basis apply 697 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 698 num_input_fields, blk_size, NULL, true, 699 e_data, impl, request); CeedChkBackend(ierr); 700 701 // Assemble QFunction 702 for (CeedInt in=0; in<num_active_in; in++) { 703 // Set Inputs 704 ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr); 705 if (num_active_in > 1) { 706 ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in], 707 0.0); CeedChkBackend(ierr); 708 } 709 // Set Outputs 710 for (CeedInt out=0; out<num_output_fields; out++) { 711 // Get output vector 712 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 713 CeedChkBackend(ierr); 714 // Check if active output 715 if (vec == CEED_VECTOR_ACTIVE) { 716 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, 717 CEED_USE_POINTER, a); CeedChkBackend(ierr); 718 ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size); 719 CeedChkBackend(ierr); 720 a += size*Q*blk_size; // Advance the pointer by the size of the output 721 } 722 } 723 // Apply QFunction 724 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 725 CeedChkBackend(ierr); 726 } 727 728 // Assemble into assembled vector 729 ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr); 730 ierr = CeedElemRestrictionApplyBlock(blk_rstr, e/blk_size, CEED_TRANSPOSE, 731 l_vec, *assembled, request); CeedChkBackend(ierr); 732 } 733 734 // Un-set output Qvecs to prevent accidental overwrite of Assembled 735 for (CeedInt out=0; out<num_output_fields; out++) { 736 // Get output vector 737 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 738 CeedChkBackend(ierr); 739 // Check if active output 740 if (vec == CEED_VECTOR_ACTIVE) { 741 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, 742 NULL); CeedChkBackend(ierr); 743 } 744 } 745 746 // Restore input arrays 747 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 748 op_input_fields, e_data, impl); 749 CeedChkBackend(ierr); 750 751 return CEED_ERROR_SUCCESS; 752 } 753 754 //------------------------------------------------------------------------------ 755 // Assemble Linear QFunction 756 //------------------------------------------------------------------------------ 757 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op, 758 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 759 return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr, 760 request); 761 } 762 763 //------------------------------------------------------------------------------ 764 // Update Assembled Linear QFunction 765 //------------------------------------------------------------------------------ 766 static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op, 767 CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 768 return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled, 769 &rstr, request); 770 } 771 772 //------------------------------------------------------------------------------ 773 // Operator Destroy 774 //------------------------------------------------------------------------------ 775 static int CeedOperatorDestroy_Opt(CeedOperator op) { 776 int ierr; 777 CeedOperator_Opt *impl; 778 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 779 780 for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) { 781 ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr); 782 ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr); 783 } 784 ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr); 785 ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr); 786 ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr); 787 788 for (CeedInt i=0; i<impl->num_inputs; i++) { 789 ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr); 790 ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr); 791 } 792 ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr); 793 ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr); 794 795 for (CeedInt i=0; i<impl->num_outputs; i++) { 796 ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr); 797 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 798 } 799 ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr); 800 ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr); 801 802 // QFunction assembly data 803 for (CeedInt i=0; i<impl->num_active_in; i++) { 804 ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr); 805 } 806 ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr); 807 ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr); 808 ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr); 809 810 ierr = CeedFree(&impl); CeedChkBackend(ierr); 811 return CEED_ERROR_SUCCESS; 812 } 813 814 //------------------------------------------------------------------------------ 815 // Operator Create 816 //------------------------------------------------------------------------------ 817 int CeedOperatorCreate_Opt(CeedOperator op) { 818 int ierr; 819 Ceed ceed; 820 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 821 Ceed_Opt *ceed_impl; 822 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 823 CeedInt blk_size = ceed_impl->blk_size; 824 CeedOperator_Opt *impl; 825 826 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 827 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 828 829 if (blk_size != 1 && blk_size != 8) 830 // LCOV_EXCL_START 831 return CeedError(ceed, CEED_ERROR_BACKEND, 832 "Opt backend cannot use blocksize: %d", blk_size); 833 // LCOV_EXCL_STOP 834 835 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 836 CeedOperatorLinearAssembleQFunction_Opt); 837 CeedChkBackend(ierr); 838 ierr = CeedSetBackendFunction(ceed, "Operator", op, 839 "LinearAssembleQFunctionUpdate", 840 CeedOperatorLinearAssembleQFunctionUpdate_Opt); 841 CeedChkBackend(ierr); 842 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 843 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr); 844 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 845 CeedOperatorDestroy_Opt); CeedChkBackend(ierr); 846 return CEED_ERROR_SUCCESS; 847 } 848 //------------------------------------------------------------------------------ 849