1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <stdint.h> 21 #include <string.h> 22 #include "ceed-opt.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 28 bool is_input, const CeedInt blk_size, 29 CeedElemRestriction *blk_restr, 30 CeedVector *e_vecs_full, CeedVector *e_vecs, 31 CeedVector *q_vecs, CeedInt start_e, 32 CeedInt num_fields, CeedInt Q) { 33 CeedInt dim, ierr, num_comp, size, P; 34 Ceed ceed; 35 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 36 CeedBasis basis; 37 CeedElemRestriction r; 38 CeedOperatorField *op_fields; 39 CeedQFunctionField *qf_fields; 40 if (is_input) { 41 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); 42 CeedChkBackend(ierr); 43 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); 44 CeedChkBackend(ierr); 45 } else { 46 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields); 47 CeedChkBackend(ierr); 48 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); 49 CeedChkBackend(ierr); 50 } 51 52 // Loop over fields 53 for (CeedInt i=0; i<num_fields; i++) { 54 CeedEvalMode eval_mode; 55 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 56 CeedChkBackend(ierr); 57 58 if (eval_mode != CEED_EVAL_WEIGHT) { 59 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r); 60 CeedChkBackend(ierr); 61 Ceed ceed; 62 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 63 CeedInt num_elem, elem_size, l_size, comp_stride; 64 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 65 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 66 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 67 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 68 69 bool strided; 70 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 71 if (strided) { 72 CeedInt strides[3]; 73 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 74 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size, 75 blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]); 76 CeedChkBackend(ierr); 77 } else { 78 const CeedInt *offsets = NULL; 79 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 80 CeedChkBackend(ierr); 81 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 82 ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size, 83 blk_size, num_comp, comp_stride, 84 l_size, CEED_MEM_HOST, 85 CEED_COPY_VALUES, offsets, 86 &blk_restr[i+start_e]); 87 CeedChkBackend(ierr); 88 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 89 } 90 ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL, 91 &e_vecs_full[i+start_e]); 92 CeedChkBackend(ierr); 93 } 94 95 switch(eval_mode) { 96 case CEED_EVAL_NONE: 97 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 98 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]); 99 CeedChkBackend(ierr); 100 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 101 CeedChkBackend(ierr); 102 break; 103 case CEED_EVAL_INTERP: 104 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 105 ierr = CeedElemRestrictionGetElementSize(r, &P); 106 CeedChkBackend(ierr); 107 ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]); 108 CeedChkBackend(ierr); 109 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 110 CeedChkBackend(ierr); 111 break; 112 case CEED_EVAL_GRAD: 113 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 114 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 115 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 116 ierr = CeedElemRestrictionGetElementSize(r, &P); 117 CeedChkBackend(ierr); 118 ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]); 119 CeedChkBackend(ierr); 120 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 121 CeedChkBackend(ierr); 122 break; 123 case CEED_EVAL_WEIGHT: // Only on input fields 124 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 125 ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr); 126 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 127 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]); 128 CeedChkBackend(ierr); 129 130 break; 131 case CEED_EVAL_DIV: 132 break; // Not implemented 133 case CEED_EVAL_CURL: 134 break; // Not implemented 135 } 136 } 137 return CEED_ERROR_SUCCESS; 138 } 139 140 //------------------------------------------------------------------------------ 141 // Setup Operator 142 //------------------------------------------------------------------------------ 143 static int CeedOperatorSetup_Opt(CeedOperator op) { 144 int ierr; 145 bool is_setup_done; 146 ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr); 147 if (is_setup_done) return CEED_ERROR_SUCCESS; 148 Ceed ceed; 149 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 150 Ceed_Opt *ceed_impl; 151 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 152 const CeedInt blk_size = ceed_impl->blk_size; 153 CeedOperator_Opt *impl; 154 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 155 CeedQFunction qf; 156 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 157 CeedInt Q, num_input_fields, num_output_fields; 158 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 159 ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr); 160 CeedOperatorField *op_input_fields, *op_output_fields; 161 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 162 &num_output_fields, &op_output_fields); 163 CeedChkBackend(ierr); 164 CeedQFunctionField *qf_input_fields, *qf_output_fields; 165 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 166 &qf_output_fields); 167 CeedChkBackend(ierr); 168 169 // Allocate 170 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr); 171 CeedChkBackend(ierr); 172 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full); 173 CeedChkBackend(ierr); 174 175 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr); 176 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr); 177 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr); 178 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr); 179 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr); 180 181 impl->num_inputs = num_input_fields; 182 impl->num_outputs = num_output_fields; 183 184 // Set up infield and outfield pointer arrays 185 // Infields 186 ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr, 187 impl->e_vecs_full, impl->e_vecs_in, 188 impl->q_vecs_in, 0, num_input_fields, Q); 189 CeedChkBackend(ierr); 190 // Outfields 191 ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr, 192 impl->e_vecs_full, impl->e_vecs_out, 193 impl->q_vecs_out, num_input_fields, 194 num_output_fields, Q); 195 CeedChkBackend(ierr); 196 197 // Identity QFunctions 198 if (impl->is_identity_qf) { 199 CeedEvalMode in_mode, out_mode; 200 CeedQFunctionField *in_fields, *out_fields; 201 ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields); 202 CeedChkBackend(ierr); 203 ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode); 204 CeedChkBackend(ierr); 205 ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode); 206 CeedChkBackend(ierr); 207 208 if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) { 209 impl->is_identity_restr_op = true; 210 } else { 211 ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr); 212 impl->q_vecs_out[0] = impl->q_vecs_in[0]; 213 ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr); 214 } 215 } 216 217 ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 218 219 return CEED_ERROR_SUCCESS; 220 } 221 222 //------------------------------------------------------------------------------ 223 // Setup Input Fields 224 //------------------------------------------------------------------------------ 225 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields, 226 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 227 CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl, 228 CeedRequest *request) { 229 CeedInt ierr; 230 CeedEvalMode eval_mode; 231 CeedVector vec; 232 uint64_t state; 233 234 for (CeedInt i=0; i<num_input_fields; i++) { 235 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 236 CeedChkBackend(ierr); 237 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 238 } else { 239 // Get input vector 240 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 241 CeedChkBackend(ierr); 242 if (vec != CEED_VECTOR_ACTIVE) { 243 // Restrict 244 ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 245 if (state != impl->input_states[i]) { 246 ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE, 247 vec, impl->e_vecs_full[i], request); 248 CeedChkBackend(ierr); 249 impl->input_states[i] = state; 250 } 251 } else { 252 // Set Qvec for CEED_EVAL_NONE 253 if (eval_mode == CEED_EVAL_NONE) { 254 ierr = CeedVectorGetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 255 &e_data[i]); CeedChkBackend(ierr); 256 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 257 CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr); 258 ierr = CeedVectorRestoreArray(impl->e_vecs_in[i], 259 &e_data[i]); CeedChkBackend(ierr); 260 } 261 } 262 // Get evec 263 ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, 264 (const CeedScalar **) &e_data[i]); 265 CeedChkBackend(ierr); 266 } 267 } 268 return CEED_ERROR_SUCCESS; 269 } 270 271 //------------------------------------------------------------------------------ 272 // Input Basis Action 273 //------------------------------------------------------------------------------ 274 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 275 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 276 CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active, 277 CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl, 278 CeedRequest *request) { 279 CeedInt ierr; 280 CeedInt dim, elem_size, size; 281 CeedElemRestriction elem_restr; 282 CeedEvalMode eval_mode; 283 CeedBasis basis; 284 CeedVector vec; 285 286 for (CeedInt i=0; i<num_input_fields; i++) { 287 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 288 CeedChkBackend(ierr); 289 // Skip active input 290 if (skip_active) { 291 if (vec == CEED_VECTOR_ACTIVE) 292 continue; 293 } 294 295 CeedInt active_in = 0; 296 // Get elem_size, eval_mode, size 297 ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr); 298 CeedChkBackend(ierr); 299 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 300 CeedChkBackend(ierr); 301 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 302 CeedChkBackend(ierr); 303 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 304 CeedChkBackend(ierr); 305 // Restrict block active input 306 if (vec == CEED_VECTOR_ACTIVE) { 307 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size, 308 CEED_NOTRANSPOSE, in_vec, 309 impl->e_vecs_in[i], request); 310 CeedChkBackend(ierr); 311 active_in = 1; 312 } 313 // Basis action 314 switch(eval_mode) { 315 case CEED_EVAL_NONE: 316 if (!active_in) { 317 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 318 CEED_USE_POINTER, &e_data[i][e*Q*size]); 319 CeedChkBackend(ierr); 320 } 321 break; 322 case CEED_EVAL_INTERP: 323 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 324 CeedChkBackend(ierr); 325 if (!active_in) { 326 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 327 CEED_USE_POINTER, &e_data[i][e*elem_size*size]); 328 CeedChkBackend(ierr); 329 } 330 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 331 CEED_EVAL_INTERP, impl->e_vecs_in[i], 332 impl->q_vecs_in[i]); CeedChkBackend(ierr); 333 break; 334 case CEED_EVAL_GRAD: 335 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 336 CeedChkBackend(ierr); 337 if (!active_in) { 338 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 339 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 340 CEED_USE_POINTER, 341 &e_data[i][e*elem_size*size/dim]); 342 CeedChkBackend(ierr); 343 } 344 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 345 CEED_EVAL_GRAD, impl->e_vecs_in[i], 346 impl->q_vecs_in[i]); CeedChkBackend(ierr); 347 break; 348 case CEED_EVAL_WEIGHT: 349 break; // No action 350 // LCOV_EXCL_START 351 case CEED_EVAL_DIV: 352 case CEED_EVAL_CURL: { 353 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 354 CeedChkBackend(ierr); 355 Ceed ceed; 356 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 357 return CeedError(ceed, CEED_ERROR_BACKEND, 358 "Ceed evaluation mode not implemented"); 359 // LCOV_EXCL_STOP 360 } 361 } 362 } 363 return CEED_ERROR_SUCCESS; 364 } 365 366 //------------------------------------------------------------------------------ 367 // Output Basis Action 368 //------------------------------------------------------------------------------ 369 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 370 CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, 371 CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields, 372 CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl, 373 CeedRequest *request) { 374 CeedInt ierr; 375 CeedElemRestriction elem_restr; 376 CeedEvalMode eval_mode; 377 CeedBasis basis; 378 CeedVector vec; 379 380 for (CeedInt i=0; i<num_output_fields; i++) { 381 // Get elem_size, eval_mode, size 382 ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr); 383 CeedChkBackend(ierr); 384 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 385 CeedChkBackend(ierr); 386 // Basis action 387 switch(eval_mode) { 388 case CEED_EVAL_NONE: 389 break; // No action 390 case CEED_EVAL_INTERP: 391 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 392 CeedChkBackend(ierr); 393 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 394 CEED_EVAL_INTERP, impl->q_vecs_out[i], 395 impl->e_vecs_out[i]); CeedChkBackend(ierr); 396 break; 397 case CEED_EVAL_GRAD: 398 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 399 CeedChkBackend(ierr); 400 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 401 CEED_EVAL_GRAD, impl->q_vecs_out[i], 402 impl->e_vecs_out[i]); CeedChkBackend(ierr); 403 break; 404 // LCOV_EXCL_START 405 case CEED_EVAL_WEIGHT: { 406 Ceed ceed; 407 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 408 return CeedError(ceed, CEED_ERROR_BACKEND, 409 "CEED_EVAL_WEIGHT cannot be an output " 410 "evaluation mode"); 411 } 412 case CEED_EVAL_DIV: 413 case CEED_EVAL_CURL: { 414 Ceed ceed; 415 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 416 return CeedError(ceed, CEED_ERROR_BACKEND, 417 "Ceed evaluation mode not implemented"); 418 // LCOV_EXCL_STOP 419 } 420 } 421 // Restrict output block 422 // Get output vector 423 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 424 CeedChkBackend(ierr); 425 if (vec == CEED_VECTOR_ACTIVE) 426 vec = out_vec; 427 // Restrict 428 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs], 429 e/blk_size, CEED_TRANSPOSE, 430 impl->e_vecs_out[i], vec, request); 431 CeedChkBackend(ierr); 432 } 433 return CEED_ERROR_SUCCESS; 434 } 435 436 //------------------------------------------------------------------------------ 437 // Restore Input Vectors 438 //------------------------------------------------------------------------------ 439 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields, 440 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 441 CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) { 442 CeedInt ierr; 443 CeedEvalMode eval_mode; 444 445 for (CeedInt i=0; i<num_input_fields; i++) { 446 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 447 CeedChkBackend(ierr); 448 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 449 } else { 450 ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i], 451 (const CeedScalar **) &e_data[i]); 452 CeedChkBackend(ierr); 453 } 454 } 455 return CEED_ERROR_SUCCESS; 456 } 457 458 //------------------------------------------------------------------------------ 459 // Operator Apply 460 //------------------------------------------------------------------------------ 461 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec, 462 CeedVector out_vec, CeedRequest *request) { 463 int ierr; 464 Ceed ceed; 465 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 466 Ceed_Opt *ceed_impl; 467 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 468 CeedInt blk_size = ceed_impl->blk_size; 469 CeedOperator_Opt *impl; 470 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 471 CeedInt Q, num_input_fields, num_output_fields, num_elem; 472 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 473 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 474 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 475 CeedQFunction qf; 476 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 477 CeedOperatorField *op_input_fields, *op_output_fields; 478 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 479 &num_output_fields, &op_output_fields); 480 CeedChkBackend(ierr); 481 CeedQFunctionField *qf_input_fields, *qf_output_fields; 482 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 483 &qf_output_fields); 484 CeedChkBackend(ierr); 485 CeedEvalMode eval_mode; 486 CeedScalar *e_data[2*CEED_FIELD_MAX] = {0}; 487 488 // Setup 489 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 490 491 // Restriction only operator 492 if (impl->is_identity_restr_op) { 493 for (CeedInt b=0; b<num_blks; b++) { 494 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE, 495 in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr); 496 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE, 497 impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr); 498 } 499 return CEED_ERROR_SUCCESS; 500 } 501 502 // Input Evecs and Restriction 503 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 504 op_input_fields, in_vec, e_data, 505 impl, request); CeedChkBackend(ierr); 506 507 // Output Lvecs, Evecs, and Qvecs 508 for (CeedInt i=0; i<num_output_fields; i++) { 509 // Set Qvec if needed 510 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 511 CeedChkBackend(ierr); 512 if (eval_mode == CEED_EVAL_NONE) { 513 // Set qvec to single block evec 514 ierr = CeedVectorGetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 515 &e_data[i + num_input_fields]); 516 CeedChkBackend(ierr); 517 ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, 518 CEED_USE_POINTER, e_data[i + num_input_fields]); 519 CeedChkBackend(ierr); 520 ierr = CeedVectorRestoreArray(impl->e_vecs_out[i], 521 &e_data[i + num_input_fields]); 522 CeedChkBackend(ierr); 523 } 524 } 525 526 // Loop through elements 527 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 528 // Input basis apply 529 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 530 num_input_fields, blk_size, in_vec, false, 531 e_data, impl, request); CeedChkBackend(ierr); 532 533 // Q function 534 if (!impl->is_identity_qf) { 535 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 536 CeedChkBackend(ierr); 537 } 538 539 // Output basis apply and restrict 540 ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields, 541 blk_size, num_input_fields, num_output_fields, 542 op, out_vec, impl, request); 543 CeedChkBackend(ierr); 544 } 545 546 // Restore input arrays 547 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 548 op_input_fields, e_data, impl); 549 CeedChkBackend(ierr); 550 551 return CEED_ERROR_SUCCESS; 552 } 553 554 //------------------------------------------------------------------------------ 555 // Core code for linear QFunction assembly 556 //------------------------------------------------------------------------------ 557 static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, 558 bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr, 559 CeedRequest *request) { 560 int ierr; 561 Ceed ceed; 562 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 563 Ceed_Opt *ceed_impl; 564 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 565 const CeedInt blk_size = ceed_impl->blk_size; 566 CeedOperator_Opt *impl; 567 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 568 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 569 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 570 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 571 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 572 CeedQFunction qf; 573 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 574 CeedOperatorField *op_input_fields, *op_output_fields; 575 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 576 &num_output_fields, &op_output_fields); 577 CeedChkBackend(ierr); 578 CeedQFunctionField *qf_input_fields, *qf_output_fields; 579 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 580 &qf_output_fields); 581 CeedChkBackend(ierr); 582 CeedVector vec, l_vec = impl->qf_l_vec; 583 CeedInt num_active_in = impl->num_active_in, 584 num_active_out = impl->num_active_out; 585 CeedVector *active_in = impl->qf_active_in; 586 CeedScalar *a, *tmp; 587 CeedScalar *e_data[2*CEED_FIELD_MAX] = {0}; 588 589 // Setup 590 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 591 592 // Check for identity 593 if (impl->is_identity_qf) 594 // LCOV_EXCL_START 595 return CeedError(ceed, CEED_ERROR_BACKEND, 596 "Assembling identity qfunctions not supported"); 597 // LCOV_EXCL_STOP 598 599 // Input Evecs and Restriction 600 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 601 op_input_fields, NULL, e_data, 602 impl, request); CeedChkBackend(ierr); 603 604 // Count number of active input fields 605 if (!num_active_in) { 606 for (CeedInt i=0; i<num_input_fields; i++) { 607 // Get input vector 608 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 609 CeedChkBackend(ierr); 610 // Check if active input 611 if (vec == CEED_VECTOR_ACTIVE) { 612 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 613 CeedChkBackend(ierr); 614 ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr); 615 ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp); 616 CeedChkBackend(ierr); 617 ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr); 618 for (CeedInt field=0; field<size; field++) { 619 ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]); 620 CeedChkBackend(ierr); 621 ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST, 622 CEED_USE_POINTER, &tmp[field*Q*blk_size]); 623 CeedChkBackend(ierr); 624 } 625 num_active_in += size; 626 ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr); 627 } 628 } 629 impl->num_active_in = num_active_in; 630 impl->qf_active_in = active_in; 631 } 632 633 // Count number of active output fields 634 if (!num_active_out) { 635 for (CeedInt i=0; i<num_output_fields; i++) { 636 // Get output vector 637 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 638 CeedChkBackend(ierr); 639 // Check if active output 640 if (vec == CEED_VECTOR_ACTIVE) { 641 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 642 CeedChkBackend(ierr); 643 num_active_out += size; 644 } 645 } 646 impl->num_active_out = num_active_out; 647 } 648 649 // Check sizes 650 if (!num_active_in || !num_active_out) 651 // LCOV_EXCL_START 652 return CeedError(ceed, CEED_ERROR_BACKEND, 653 "Cannot assemble QFunction without active inputs " 654 "and outputs"); 655 // LCOV_EXCL_STOP 656 657 // Setup l_vec 658 if (!l_vec) { 659 ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out, 660 &l_vec); CeedChkBackend(ierr); 661 impl->qf_l_vec = l_vec; 662 } 663 ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 664 665 // Build objects if needed 666 CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; 667 if (build_objects) { 668 // Create output restriction 669 ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q, 670 num_active_in*num_active_out, 671 num_active_in*num_active_out*num_elem*Q, 672 strides, rstr); CeedChkBackend(ierr); 673 // Create assembled vector 674 ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out, 675 assembled); CeedChkBackend(ierr); 676 } 677 678 // Loop through elements 679 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 680 // Input basis apply 681 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 682 num_input_fields, blk_size, NULL, true, 683 e_data, impl, request); CeedChkBackend(ierr); 684 685 // Assemble QFunction 686 for (CeedInt in=0; in<num_active_in; in++) { 687 // Set Inputs 688 ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr); 689 if (num_active_in > 1) { 690 ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in], 691 0.0); CeedChkBackend(ierr); 692 } 693 // Set Outputs 694 for (CeedInt out=0; out<num_output_fields; out++) { 695 // Get output vector 696 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 697 CeedChkBackend(ierr); 698 // Check if active output 699 if (vec == CEED_VECTOR_ACTIVE) { 700 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, 701 CEED_USE_POINTER, a); CeedChkBackend(ierr); 702 ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size); 703 CeedChkBackend(ierr); 704 a += size*Q*blk_size; // Advance the pointer by the size of the output 705 } 706 } 707 // Apply QFunction 708 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 709 CeedChkBackend(ierr); 710 } 711 } 712 713 // Un-set output Qvecs to prevent accidental overwrite of Assembled 714 for (CeedInt out=0; out<num_output_fields; out++) { 715 // Get output vector 716 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 717 CeedChkBackend(ierr); 718 // Check if active output 719 if (vec == CEED_VECTOR_ACTIVE) { 720 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, 721 NULL); CeedChkBackend(ierr); 722 } 723 } 724 725 // Restore input arrays 726 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 727 op_input_fields, e_data, impl); 728 CeedChkBackend(ierr); 729 730 // Output blocked restriction 731 ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr); 732 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 733 CeedElemRestriction blk_rstr = impl->qf_blk_rstr; 734 if (!blk_rstr) { 735 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size, 736 num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q, 737 strides, &blk_rstr); CeedChkBackend(ierr); 738 impl->qf_blk_rstr = blk_rstr; 739 } 740 ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled, 741 request); CeedChkBackend(ierr); 742 743 return CEED_ERROR_SUCCESS; 744 } 745 746 //------------------------------------------------------------------------------ 747 // Assemble Linear QFunction 748 //------------------------------------------------------------------------------ 749 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op, 750 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 751 return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr, 752 request); 753 } 754 755 //------------------------------------------------------------------------------ 756 // Update Assembled Linear QFunction 757 //------------------------------------------------------------------------------ 758 static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op, 759 CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 760 return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled, 761 &rstr, request); 762 } 763 764 //------------------------------------------------------------------------------ 765 // Operator Destroy 766 //------------------------------------------------------------------------------ 767 static int CeedOperatorDestroy_Opt(CeedOperator op) { 768 int ierr; 769 CeedOperator_Opt *impl; 770 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 771 772 for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) { 773 ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr); 774 ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr); 775 } 776 ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr); 777 ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr); 778 ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr); 779 780 for (CeedInt i=0; i<impl->num_inputs; i++) { 781 ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr); 782 ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr); 783 } 784 ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr); 785 ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr); 786 787 for (CeedInt i=0; i<impl->num_outputs; i++) { 788 ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr); 789 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 790 } 791 ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr); 792 ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr); 793 794 // QFunction assembly data 795 for (CeedInt i=0; i<impl->num_active_in; i++) { 796 ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr); 797 } 798 ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr); 799 ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr); 800 ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr); 801 802 ierr = CeedFree(&impl); CeedChkBackend(ierr); 803 return CEED_ERROR_SUCCESS; 804 } 805 806 //------------------------------------------------------------------------------ 807 // Operator Create 808 //------------------------------------------------------------------------------ 809 int CeedOperatorCreate_Opt(CeedOperator op) { 810 int ierr; 811 Ceed ceed; 812 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 813 Ceed_Opt *ceed_impl; 814 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 815 CeedInt blk_size = ceed_impl->blk_size; 816 CeedOperator_Opt *impl; 817 818 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 819 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 820 821 if (blk_size != 1 && blk_size != 8) 822 // LCOV_EXCL_START 823 return CeedError(ceed, CEED_ERROR_BACKEND, 824 "Opt backend cannot use blocksize: %d", blk_size); 825 // LCOV_EXCL_STOP 826 827 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 828 CeedOperatorLinearAssembleQFunction_Opt); 829 CeedChkBackend(ierr); 830 ierr = CeedSetBackendFunction(ceed, "Operator", op, 831 "LinearAssembleQFunctionUpdate", 832 CeedOperatorLinearAssembleQFunctionUpdate_Opt); 833 CeedChkBackend(ierr); 834 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 835 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr); 836 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 837 CeedOperatorDestroy_Opt); CeedChkBackend(ierr); 838 return CEED_ERROR_SUCCESS; 839 } 840 //------------------------------------------------------------------------------ 841