1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <stdint.h> 21 #include <string.h> 22 #include "ceed-opt.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 28 bool is_input, const CeedInt blk_size, 29 CeedElemRestriction *blk_restr, 30 CeedVector *e_vecs_full, CeedVector *e_vecs, 31 CeedVector *q_vecs, CeedInt start_e, 32 CeedInt num_fields, CeedInt Q) { 33 CeedInt dim, ierr, num_comp, size, P; 34 Ceed ceed; 35 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 36 CeedBasis basis; 37 CeedElemRestriction r; 38 CeedOperatorField *op_fields; 39 CeedQFunctionField *qf_fields; 40 if (is_input) { 41 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); 42 CeedChkBackend(ierr); 43 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); 44 CeedChkBackend(ierr); 45 } else { 46 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL,&op_fields); 47 CeedChkBackend(ierr); 48 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); 49 CeedChkBackend(ierr); 50 } 51 52 // Loop over fields 53 for (CeedInt i=0; i<num_fields; i++) { 54 CeedEvalMode eval_mode; 55 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 56 CeedChkBackend(ierr); 57 58 if (eval_mode != CEED_EVAL_WEIGHT) { 59 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r); 60 CeedChkBackend(ierr); 61 Ceed ceed; 62 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 63 CeedInt num_elem, elem_size, l_size, comp_stride; 64 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 65 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 66 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 67 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 68 69 bool strided; 70 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 71 if (strided) { 72 CeedInt strides[3]; 73 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 74 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size, 75 blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]); 76 CeedChkBackend(ierr); 77 } else { 78 const CeedInt *offsets = NULL; 79 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 80 CeedChkBackend(ierr); 81 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 82 ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size, 83 blk_size, num_comp, comp_stride, 84 l_size, CEED_MEM_HOST, 85 CEED_COPY_VALUES, offsets, 86 &blk_restr[i+start_e]); 87 CeedChkBackend(ierr); 88 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 89 } 90 ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL, 91 &e_vecs_full[i+start_e]); 92 CeedChkBackend(ierr); 93 } 94 95 switch(eval_mode) { 96 case CEED_EVAL_NONE: 97 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 98 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]); 99 CeedChkBackend(ierr); 100 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 101 CeedChkBackend(ierr); 102 break; 103 case CEED_EVAL_INTERP: 104 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 105 ierr = CeedElemRestrictionGetElementSize(r, &P); 106 CeedChkBackend(ierr); 107 ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]); 108 CeedChkBackend(ierr); 109 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 110 CeedChkBackend(ierr); 111 break; 112 case CEED_EVAL_GRAD: 113 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 114 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 115 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 116 ierr = CeedElemRestrictionGetElementSize(r, &P); 117 CeedChkBackend(ierr); 118 ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]); 119 CeedChkBackend(ierr); 120 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 121 CeedChkBackend(ierr); 122 break; 123 case CEED_EVAL_WEIGHT: // Only on input fields 124 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 125 ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr); 126 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 127 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]); 128 CeedChkBackend(ierr); 129 130 break; 131 case CEED_EVAL_DIV: 132 break; // Not implemented 133 case CEED_EVAL_CURL: 134 break; // Not implemented 135 } 136 if (is_input && !!e_vecs[i]) { 137 ierr = CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST, 138 CEED_COPY_VALUES, NULL); CeedChkBackend(ierr); 139 } 140 } 141 return CEED_ERROR_SUCCESS; 142 } 143 144 //------------------------------------------------------------------------------ 145 // Setup Operator 146 //------------------------------------------------------------------------------ 147 static int CeedOperatorSetup_Opt(CeedOperator op) { 148 int ierr; 149 bool is_setup_done; 150 ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr); 151 if (is_setup_done) return CEED_ERROR_SUCCESS; 152 Ceed ceed; 153 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 154 Ceed_Opt *ceed_impl; 155 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 156 const CeedInt blk_size = ceed_impl->blk_size; 157 CeedOperator_Opt *impl; 158 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 159 CeedQFunction qf; 160 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 161 CeedInt Q, num_input_fields, num_output_fields; 162 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 163 ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr); 164 CeedOperatorField *op_input_fields, *op_output_fields; 165 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 166 &num_output_fields, &op_output_fields); 167 CeedChkBackend(ierr); 168 CeedQFunctionField *qf_input_fields, *qf_output_fields; 169 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 170 &qf_output_fields); 171 CeedChkBackend(ierr); 172 173 // Allocate 174 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr); 175 CeedChkBackend(ierr); 176 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full); 177 CeedChkBackend(ierr); 178 179 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr); 180 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr); 181 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr); 182 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr); 183 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr); 184 185 impl->num_inputs = num_input_fields; 186 impl->num_outputs = num_output_fields; 187 188 // Set up infield and outfield pointer arrays 189 // Infields 190 ierr = CeedOperatorSetupFields_Opt(qf, op, true, blk_size, impl->blk_restr, 191 impl->e_vecs_full, impl->e_vecs_in, 192 impl->q_vecs_in, 0, num_input_fields, Q); 193 CeedChkBackend(ierr); 194 // Outfields 195 ierr = CeedOperatorSetupFields_Opt(qf, op, false, blk_size, impl->blk_restr, 196 impl->e_vecs_full, impl->e_vecs_out, 197 impl->q_vecs_out, num_input_fields, 198 num_output_fields, Q); 199 CeedChkBackend(ierr); 200 201 // Identity QFunctions 202 if (impl->is_identity_qf) { 203 CeedEvalMode in_mode, out_mode; 204 CeedQFunctionField *in_fields, *out_fields; 205 ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields); 206 CeedChkBackend(ierr); 207 ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode); 208 CeedChkBackend(ierr); 209 ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode); 210 CeedChkBackend(ierr); 211 212 if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) { 213 impl->is_identity_restr_op = true; 214 } else { 215 ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr); 216 impl->q_vecs_out[0] = impl->q_vecs_in[0]; 217 ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr); 218 } 219 } 220 221 ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 222 223 return CEED_ERROR_SUCCESS; 224 } 225 226 //------------------------------------------------------------------------------ 227 // Setup Input Fields 228 //------------------------------------------------------------------------------ 229 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields, 230 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 231 CeedVector in_vec, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl, 232 CeedRequest *request) { 233 CeedInt ierr; 234 CeedEvalMode eval_mode; 235 CeedVector vec; 236 uint64_t state; 237 238 for (CeedInt i=0; i<num_input_fields; i++) { 239 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 240 CeedChkBackend(ierr); 241 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 242 } else { 243 // Get input vector 244 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 245 CeedChkBackend(ierr); 246 if (vec != CEED_VECTOR_ACTIVE) { 247 // Restrict 248 ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 249 if (state != impl->input_states[i]) { 250 ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE, 251 vec, impl->e_vecs_full[i], request); 252 CeedChkBackend(ierr); 253 impl->input_states[i] = state; 254 } 255 // Get evec 256 ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, 257 (const CeedScalar **) &e_data[i]); 258 CeedChkBackend(ierr); 259 } else { 260 // Set Qvec for CEED_EVAL_NONE 261 if (eval_mode == CEED_EVAL_NONE) { 262 ierr = CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST, 263 (const CeedScalar **)&e_data[i]); 264 CeedChkBackend(ierr); 265 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 266 CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr); 267 ierr = CeedVectorRestoreArrayRead(impl->e_vecs_in[i], 268 (const CeedScalar **)&e_data[i]); 269 CeedChkBackend(ierr); 270 } 271 } 272 } 273 } 274 return CEED_ERROR_SUCCESS; 275 } 276 277 //------------------------------------------------------------------------------ 278 // Input Basis Action 279 //------------------------------------------------------------------------------ 280 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 281 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 282 CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active, 283 CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl, 284 CeedRequest *request) { 285 CeedInt ierr; 286 CeedInt dim, elem_size, size; 287 CeedElemRestriction elem_restr; 288 CeedEvalMode eval_mode; 289 CeedBasis basis; 290 CeedVector vec; 291 292 for (CeedInt i=0; i<num_input_fields; i++) { 293 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 294 CeedChkBackend(ierr); 295 // Skip active input 296 if (skip_active) { 297 if (vec == CEED_VECTOR_ACTIVE) 298 continue; 299 } 300 301 CeedInt active_in = 0; 302 // Get elem_size, eval_mode, size 303 ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr); 304 CeedChkBackend(ierr); 305 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 306 CeedChkBackend(ierr); 307 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 308 CeedChkBackend(ierr); 309 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 310 CeedChkBackend(ierr); 311 // Restrict block active input 312 if (vec == CEED_VECTOR_ACTIVE) { 313 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size, 314 CEED_NOTRANSPOSE, in_vec, 315 impl->e_vecs_in[i], request); 316 CeedChkBackend(ierr); 317 active_in = 1; 318 } 319 // Basis action 320 switch(eval_mode) { 321 case CEED_EVAL_NONE: 322 if (!active_in) { 323 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 324 CEED_USE_POINTER, &e_data[i][e*Q*size]); 325 CeedChkBackend(ierr); 326 } 327 break; 328 case CEED_EVAL_INTERP: 329 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 330 CeedChkBackend(ierr); 331 if (!active_in) { 332 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 333 CEED_USE_POINTER, &e_data[i][e*elem_size*size]); 334 CeedChkBackend(ierr); 335 } 336 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 337 CEED_EVAL_INTERP, impl->e_vecs_in[i], 338 impl->q_vecs_in[i]); CeedChkBackend(ierr); 339 break; 340 case CEED_EVAL_GRAD: 341 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 342 CeedChkBackend(ierr); 343 if (!active_in) { 344 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 345 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 346 CEED_USE_POINTER, 347 &e_data[i][e*elem_size*size/dim]); 348 CeedChkBackend(ierr); 349 } 350 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 351 CEED_EVAL_GRAD, impl->e_vecs_in[i], 352 impl->q_vecs_in[i]); CeedChkBackend(ierr); 353 break; 354 case CEED_EVAL_WEIGHT: 355 break; // No action 356 // LCOV_EXCL_START 357 case CEED_EVAL_DIV: 358 case CEED_EVAL_CURL: { 359 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 360 CeedChkBackend(ierr); 361 Ceed ceed; 362 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 363 return CeedError(ceed, CEED_ERROR_BACKEND, 364 "Ceed evaluation mode not implemented"); 365 // LCOV_EXCL_STOP 366 } 367 } 368 } 369 return CEED_ERROR_SUCCESS; 370 } 371 372 //------------------------------------------------------------------------------ 373 // Output Basis Action 374 //------------------------------------------------------------------------------ 375 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 376 CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, 377 CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields, 378 CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl, 379 CeedRequest *request) { 380 CeedInt ierr; 381 CeedElemRestriction elem_restr; 382 CeedEvalMode eval_mode; 383 CeedBasis basis; 384 CeedVector vec; 385 386 for (CeedInt i=0; i<num_output_fields; i++) { 387 // Get elem_size, eval_mode, size 388 ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr); 389 CeedChkBackend(ierr); 390 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 391 CeedChkBackend(ierr); 392 // Basis action 393 switch(eval_mode) { 394 case CEED_EVAL_NONE: 395 break; // No action 396 case CEED_EVAL_INTERP: 397 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 398 CeedChkBackend(ierr); 399 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 400 CEED_EVAL_INTERP, impl->q_vecs_out[i], 401 impl->e_vecs_out[i]); CeedChkBackend(ierr); 402 break; 403 case CEED_EVAL_GRAD: 404 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 405 CeedChkBackend(ierr); 406 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 407 CEED_EVAL_GRAD, impl->q_vecs_out[i], 408 impl->e_vecs_out[i]); CeedChkBackend(ierr); 409 break; 410 // LCOV_EXCL_START 411 case CEED_EVAL_WEIGHT: { 412 Ceed ceed; 413 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 414 return CeedError(ceed, CEED_ERROR_BACKEND, 415 "CEED_EVAL_WEIGHT cannot be an output " 416 "evaluation mode"); 417 } 418 case CEED_EVAL_DIV: 419 case CEED_EVAL_CURL: { 420 Ceed ceed; 421 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 422 return CeedError(ceed, CEED_ERROR_BACKEND, 423 "Ceed evaluation mode not implemented"); 424 // LCOV_EXCL_STOP 425 } 426 } 427 // Restrict output block 428 // Get output vector 429 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 430 CeedChkBackend(ierr); 431 if (vec == CEED_VECTOR_ACTIVE) 432 vec = out_vec; 433 // Restrict 434 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_inputs], 435 e/blk_size, CEED_TRANSPOSE, 436 impl->e_vecs_out[i], vec, request); 437 CeedChkBackend(ierr); 438 } 439 return CEED_ERROR_SUCCESS; 440 } 441 442 //------------------------------------------------------------------------------ 443 // Restore Input Vectors 444 //------------------------------------------------------------------------------ 445 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields, 446 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 447 CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) { 448 CeedInt ierr; 449 450 for (CeedInt i=0; i<num_input_fields; i++) { 451 CeedEvalMode eval_mode; 452 CeedVector vec; 453 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 454 CeedChkBackend(ierr); 455 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 456 CeedChkBackend(ierr); 457 if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) { 458 ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i], 459 (const CeedScalar **) &e_data[i]); 460 CeedChkBackend(ierr); 461 } 462 } 463 return CEED_ERROR_SUCCESS; 464 } 465 466 //------------------------------------------------------------------------------ 467 // Operator Apply 468 //------------------------------------------------------------------------------ 469 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec, 470 CeedVector out_vec, CeedRequest *request) { 471 int ierr; 472 Ceed ceed; 473 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 474 Ceed_Opt *ceed_impl; 475 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 476 CeedInt blk_size = ceed_impl->blk_size; 477 CeedOperator_Opt *impl; 478 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 479 CeedInt Q, num_input_fields, num_output_fields, num_elem; 480 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 481 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 482 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 483 CeedQFunction qf; 484 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 485 CeedOperatorField *op_input_fields, *op_output_fields; 486 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 487 &num_output_fields, &op_output_fields); 488 CeedChkBackend(ierr); 489 CeedQFunctionField *qf_input_fields, *qf_output_fields; 490 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 491 &qf_output_fields); 492 CeedChkBackend(ierr); 493 CeedEvalMode eval_mode; 494 CeedScalar *e_data[2*CEED_FIELD_MAX] = {0}; 495 496 // Setup 497 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 498 499 // Restriction only operator 500 if (impl->is_identity_restr_op) { 501 for (CeedInt b=0; b<num_blks; b++) { 502 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE, 503 in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr); 504 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE, 505 impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr); 506 } 507 return CEED_ERROR_SUCCESS; 508 } 509 510 // Input Evecs and Restriction 511 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 512 op_input_fields, in_vec, e_data, 513 impl, request); CeedChkBackend(ierr); 514 515 // Output Lvecs, Evecs, and Qvecs 516 for (CeedInt i=0; i<num_output_fields; i++) { 517 // Set Qvec if needed 518 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 519 CeedChkBackend(ierr); 520 if (eval_mode == CEED_EVAL_NONE) { 521 // Set qvec to single block evec 522 ierr = CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST, 523 &e_data[i + num_input_fields]); 524 CeedChkBackend(ierr); 525 ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, 526 CEED_USE_POINTER, e_data[i + num_input_fields]); 527 CeedChkBackend(ierr); 528 ierr = CeedVectorRestoreArray(impl->e_vecs_out[i], 529 &e_data[i + num_input_fields]); 530 CeedChkBackend(ierr); 531 } 532 } 533 534 // Loop through elements 535 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 536 // Input basis apply 537 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 538 num_input_fields, blk_size, in_vec, false, 539 e_data, impl, request); CeedChkBackend(ierr); 540 541 // Q function 542 if (!impl->is_identity_qf) { 543 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 544 CeedChkBackend(ierr); 545 } 546 547 // Output basis apply and restrict 548 ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields, 549 blk_size, num_input_fields, num_output_fields, 550 op, out_vec, impl, request); 551 CeedChkBackend(ierr); 552 } 553 554 // Restore input arrays 555 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 556 op_input_fields, e_data, impl); 557 CeedChkBackend(ierr); 558 559 return CEED_ERROR_SUCCESS; 560 } 561 562 //------------------------------------------------------------------------------ 563 // Core code for linear QFunction assembly 564 //------------------------------------------------------------------------------ 565 static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, 566 bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr, 567 CeedRequest *request) { 568 int ierr; 569 Ceed ceed; 570 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 571 Ceed_Opt *ceed_impl; 572 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 573 const CeedInt blk_size = ceed_impl->blk_size; 574 CeedOperator_Opt *impl; 575 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 576 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 577 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 578 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 579 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 580 CeedQFunction qf; 581 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 582 CeedOperatorField *op_input_fields, *op_output_fields; 583 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 584 &num_output_fields, &op_output_fields); 585 CeedChkBackend(ierr); 586 CeedQFunctionField *qf_input_fields, *qf_output_fields; 587 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 588 &qf_output_fields); 589 CeedChkBackend(ierr); 590 CeedVector vec, l_vec = impl->qf_l_vec; 591 CeedInt num_active_in = impl->num_active_in, 592 num_active_out = impl->num_active_out; 593 CeedVector *active_in = impl->qf_active_in; 594 CeedScalar *a, *tmp; 595 CeedScalar *e_data[2*CEED_FIELD_MAX] = {0}; 596 597 // Setup 598 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 599 600 // Check for identity 601 if (impl->is_identity_qf) 602 // LCOV_EXCL_START 603 return CeedError(ceed, CEED_ERROR_BACKEND, 604 "Assembling identity qfunctions not supported"); 605 // LCOV_EXCL_STOP 606 607 // Input Evecs and Restriction 608 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 609 op_input_fields, NULL, e_data, 610 impl, request); CeedChkBackend(ierr); 611 612 // Count number of active input fields 613 if (!num_active_in) { 614 for (CeedInt i=0; i<num_input_fields; i++) { 615 // Get input vector 616 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 617 CeedChkBackend(ierr); 618 // Check if active input 619 if (vec == CEED_VECTOR_ACTIVE) { 620 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 621 CeedChkBackend(ierr); 622 ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr); 623 ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp); 624 CeedChkBackend(ierr); 625 ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr); 626 for (CeedInt field=0; field<size; field++) { 627 ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]); 628 CeedChkBackend(ierr); 629 ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST, 630 CEED_USE_POINTER, &tmp[field*Q*blk_size]); 631 CeedChkBackend(ierr); 632 } 633 num_active_in += size; 634 ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr); 635 } 636 } 637 impl->num_active_in = num_active_in; 638 impl->qf_active_in = active_in; 639 } 640 641 // Count number of active output fields 642 if (!num_active_out) { 643 for (CeedInt i=0; i<num_output_fields; i++) { 644 // Get output vector 645 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 646 CeedChkBackend(ierr); 647 // Check if active output 648 if (vec == CEED_VECTOR_ACTIVE) { 649 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 650 CeedChkBackend(ierr); 651 num_active_out += size; 652 } 653 } 654 impl->num_active_out = num_active_out; 655 } 656 657 // Check sizes 658 if (!num_active_in || !num_active_out) 659 // LCOV_EXCL_START 660 return CeedError(ceed, CEED_ERROR_BACKEND, 661 "Cannot assemble QFunction without active inputs " 662 "and outputs"); 663 // LCOV_EXCL_STOP 664 665 // Setup l_vec 666 if (!l_vec) { 667 ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out, 668 &l_vec); CeedChkBackend(ierr); 669 ierr = CeedVectorSetValue(l_vec, 0.0); CeedChkBackend(ierr); 670 impl->qf_l_vec = l_vec; 671 } 672 ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 673 674 // Build objects if needed 675 CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; 676 if (build_objects) { 677 // Create output restriction 678 ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q, 679 num_active_in*num_active_out, 680 num_active_in*num_active_out*num_elem*Q, 681 strides, rstr); CeedChkBackend(ierr); 682 // Create assembled vector 683 ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out, 684 assembled); CeedChkBackend(ierr); 685 } 686 687 // Loop through elements 688 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 689 // Input basis apply 690 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 691 num_input_fields, blk_size, NULL, true, 692 e_data, impl, request); CeedChkBackend(ierr); 693 694 // Assemble QFunction 695 for (CeedInt in=0; in<num_active_in; in++) { 696 // Set Inputs 697 ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr); 698 if (num_active_in > 1) { 699 ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in], 700 0.0); CeedChkBackend(ierr); 701 } 702 // Set Outputs 703 for (CeedInt out=0; out<num_output_fields; out++) { 704 // Get output vector 705 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 706 CeedChkBackend(ierr); 707 // Check if active output 708 if (vec == CEED_VECTOR_ACTIVE) { 709 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, 710 CEED_USE_POINTER, a); CeedChkBackend(ierr); 711 ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size); 712 CeedChkBackend(ierr); 713 a += size*Q*blk_size; // Advance the pointer by the size of the output 714 } 715 } 716 // Apply QFunction 717 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 718 CeedChkBackend(ierr); 719 } 720 } 721 722 // Un-set output Qvecs to prevent accidental overwrite of Assembled 723 for (CeedInt out=0; out<num_output_fields; out++) { 724 // Get output vector 725 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 726 CeedChkBackend(ierr); 727 // Check if active output 728 if (vec == CEED_VECTOR_ACTIVE) { 729 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, 730 NULL); CeedChkBackend(ierr); 731 } 732 } 733 734 // Restore input arrays 735 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 736 op_input_fields, e_data, impl); 737 CeedChkBackend(ierr); 738 739 // Output blocked restriction 740 ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr); 741 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 742 CeedElemRestriction blk_rstr = impl->qf_blk_rstr; 743 if (!blk_rstr) { 744 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size, 745 num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q, 746 strides, &blk_rstr); CeedChkBackend(ierr); 747 impl->qf_blk_rstr = blk_rstr; 748 } 749 ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled, 750 request); CeedChkBackend(ierr); 751 752 return CEED_ERROR_SUCCESS; 753 } 754 755 //------------------------------------------------------------------------------ 756 // Assemble Linear QFunction 757 //------------------------------------------------------------------------------ 758 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op, 759 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 760 return CeedOperatorLinearAssembleQFunctionCore_Opt(op, true, assembled, rstr, 761 request); 762 } 763 764 //------------------------------------------------------------------------------ 765 // Update Assembled Linear QFunction 766 //------------------------------------------------------------------------------ 767 static int CeedOperatorLinearAssembleQFunctionUpdate_Opt(CeedOperator op, 768 CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 769 return CeedOperatorLinearAssembleQFunctionCore_Opt(op, false, &assembled, 770 &rstr, request); 771 } 772 773 //------------------------------------------------------------------------------ 774 // Operator Destroy 775 //------------------------------------------------------------------------------ 776 static int CeedOperatorDestroy_Opt(CeedOperator op) { 777 int ierr; 778 CeedOperator_Opt *impl; 779 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 780 781 for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) { 782 ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr); 783 ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr); 784 } 785 ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr); 786 ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr); 787 ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr); 788 789 for (CeedInt i=0; i<impl->num_inputs; i++) { 790 ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr); 791 ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr); 792 } 793 ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr); 794 ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr); 795 796 for (CeedInt i=0; i<impl->num_outputs; i++) { 797 ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr); 798 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 799 } 800 ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr); 801 ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr); 802 803 // QFunction assembly data 804 for (CeedInt i=0; i<impl->num_active_in; i++) { 805 ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr); 806 } 807 ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr); 808 ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr); 809 ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr); 810 811 ierr = CeedFree(&impl); CeedChkBackend(ierr); 812 return CEED_ERROR_SUCCESS; 813 } 814 815 //------------------------------------------------------------------------------ 816 // Operator Create 817 //------------------------------------------------------------------------------ 818 int CeedOperatorCreate_Opt(CeedOperator op) { 819 int ierr; 820 Ceed ceed; 821 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 822 Ceed_Opt *ceed_impl; 823 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 824 CeedInt blk_size = ceed_impl->blk_size; 825 CeedOperator_Opt *impl; 826 827 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 828 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 829 830 if (blk_size != 1 && blk_size != 8) 831 // LCOV_EXCL_START 832 return CeedError(ceed, CEED_ERROR_BACKEND, 833 "Opt backend cannot use blocksize: %d", blk_size); 834 // LCOV_EXCL_STOP 835 836 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 837 CeedOperatorLinearAssembleQFunction_Opt); 838 CeedChkBackend(ierr); 839 ierr = CeedSetBackendFunction(ceed, "Operator", op, 840 "LinearAssembleQFunctionUpdate", 841 CeedOperatorLinearAssembleQFunctionUpdate_Opt); 842 CeedChkBackend(ierr); 843 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 844 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr); 845 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 846 CeedOperatorDestroy_Opt); CeedChkBackend(ierr); 847 return CEED_ERROR_SUCCESS; 848 } 849 //------------------------------------------------------------------------------ 850