1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <stdint.h> 21 #include <string.h> 22 #include "ceed-opt.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, 28 bool inOrOut, const CeedInt blk_size, 29 CeedElemRestriction *blk_restr, 30 CeedVector *full_evecs, CeedVector *e_vecs, 31 CeedVector *q_vecs, CeedInt start_e, 32 CeedInt num_fields, CeedInt Q) { 33 CeedInt dim, ierr, num_comp, size, P; 34 Ceed ceed; 35 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 36 CeedBasis basis; 37 CeedElemRestriction r; 38 CeedOperatorField *op_fields; 39 CeedQFunctionField *qf_fields; 40 if (inOrOut) { 41 ierr = CeedOperatorGetFields(op, NULL, &op_fields); 42 CeedChkBackend(ierr); 43 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields); 44 CeedChkBackend(ierr); 45 } else { 46 ierr = CeedOperatorGetFields(op, &op_fields, NULL); 47 CeedChkBackend(ierr); 48 ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); 49 CeedChkBackend(ierr); 50 } 51 52 // Loop over fields 53 for (CeedInt i=0; i<num_fields; i++) { 54 CeedEvalMode eval_mode; 55 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 56 CeedChkBackend(ierr); 57 58 if (eval_mode != CEED_EVAL_WEIGHT) { 59 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r); 60 CeedChkBackend(ierr); 61 Ceed ceed; 62 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 63 CeedInt num_elem, elem_size, l_size, comp_stride; 64 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 65 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 66 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 67 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 68 69 bool strided; 70 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 71 if (strided) { 72 CeedInt strides[3]; 73 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 74 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size, 75 blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]); 76 CeedChkBackend(ierr); 77 } else { 78 const CeedInt *offsets = NULL; 79 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 80 CeedChkBackend(ierr); 81 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 82 ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size, 83 blk_size, num_comp, comp_stride, 84 l_size, CEED_MEM_HOST, 85 CEED_COPY_VALUES, offsets, 86 &blk_restr[i+start_e]); 87 CeedChkBackend(ierr); 88 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 89 } 90 ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL, 91 &full_evecs[i+start_e]); 92 CeedChkBackend(ierr); 93 } 94 95 switch(eval_mode) { 96 case CEED_EVAL_NONE: 97 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 98 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]); 99 CeedChkBackend(ierr); 100 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 101 CeedChkBackend(ierr); 102 break; 103 case CEED_EVAL_INTERP: 104 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 105 ierr = CeedElemRestrictionGetElementSize(r, &P); 106 CeedChkBackend(ierr); 107 ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]); 108 CeedChkBackend(ierr); 109 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 110 CeedChkBackend(ierr); 111 break; 112 case CEED_EVAL_GRAD: 113 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 114 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 115 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 116 ierr = CeedElemRestrictionGetElementSize(r, &P); 117 CeedChkBackend(ierr); 118 ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]); 119 CeedChkBackend(ierr); 120 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 121 CeedChkBackend(ierr); 122 break; 123 case CEED_EVAL_WEIGHT: // Only on input fields 124 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 125 ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr); 126 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 127 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]); 128 CeedChkBackend(ierr); 129 130 break; 131 case CEED_EVAL_DIV: 132 break; // Not implemented 133 case CEED_EVAL_CURL: 134 break; // Not implemented 135 } 136 } 137 return CEED_ERROR_SUCCESS; 138 } 139 140 //------------------------------------------------------------------------------ 141 // Setup Operator 142 //------------------------------------------------------------------------------ 143 static int CeedOperatorSetup_Opt(CeedOperator op) { 144 int ierr; 145 bool setup_done; 146 ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr); 147 if (setup_done) return CEED_ERROR_SUCCESS; 148 Ceed ceed; 149 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 150 Ceed_Opt *ceed_impl; 151 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 152 const CeedInt blk_size = ceed_impl->blk_size; 153 CeedOperator_Opt *impl; 154 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 155 CeedQFunction qf; 156 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 157 CeedInt Q, num_input_fields, num_output_fields; 158 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 159 ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr); 160 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 161 CeedChkBackend(ierr); 162 CeedOperatorField *op_input_fields, *op_output_fields; 163 ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields); 164 CeedChkBackend(ierr); 165 CeedQFunctionField *qf_input_fields, *qf_output_fields; 166 ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields); 167 CeedChkBackend(ierr); 168 169 // Allocate 170 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr); 171 CeedChkBackend(ierr); 172 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs); 173 CeedChkBackend(ierr); 174 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data); 175 CeedChkBackend(ierr); 176 177 ierr = CeedCalloc(16, &impl->input_state); CeedChkBackend(ierr); 178 ierr = CeedCalloc(16, &impl->e_vecs_in); CeedChkBackend(ierr); 179 ierr = CeedCalloc(16, &impl->e_vecs_out); CeedChkBackend(ierr); 180 ierr = CeedCalloc(16, &impl->q_vecs_in); CeedChkBackend(ierr); 181 ierr = CeedCalloc(16, &impl->q_vecs_out); CeedChkBackend(ierr); 182 183 impl->num_e_vecs_in = num_input_fields; 184 impl->num_e_vecs_out = num_output_fields; 185 186 // Set up infield and outfield pointer arrays 187 // Infields 188 ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blk_size, impl->blk_restr, 189 impl->e_vecs, impl->e_vecs_in, 190 impl->q_vecs_in, 0, num_input_fields, Q); 191 CeedChkBackend(ierr); 192 // Outfields 193 ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blk_size, impl->blk_restr, 194 impl->e_vecs, impl->e_vecs_out, 195 impl->q_vecs_out, num_input_fields, 196 num_output_fields, Q); 197 CeedChkBackend(ierr); 198 199 // Identity QFunctions 200 if (impl->is_identity_qf) { 201 CeedEvalMode in_mode, out_mode; 202 CeedQFunctionField *in_fields, *out_fields; 203 ierr = CeedQFunctionGetFields(qf, &in_fields, &out_fields); 204 CeedChkBackend(ierr); 205 ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode); 206 CeedChkBackend(ierr); 207 ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode); 208 CeedChkBackend(ierr); 209 210 if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) { 211 impl->is_identity_restr_op = true; 212 } else { 213 ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr); 214 impl->q_vecs_out[0] = impl->q_vecs_in[0]; 215 ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr); 216 } 217 } 218 219 ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 220 221 return CEED_ERROR_SUCCESS; 222 } 223 224 //------------------------------------------------------------------------------ 225 // Setup Input Fields 226 //------------------------------------------------------------------------------ 227 static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields, 228 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 229 CeedVector in_vec, CeedOperator_Opt *impl, CeedRequest *request) { 230 CeedInt ierr; 231 CeedEvalMode eval_mode; 232 CeedVector vec; 233 uint64_t state; 234 235 for (CeedInt i=0; i<num_input_fields; i++) { 236 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 237 CeedChkBackend(ierr); 238 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 239 } else { 240 // Get input vector 241 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 242 CeedChkBackend(ierr); 243 if (vec != CEED_VECTOR_ACTIVE) { 244 // Restrict 245 ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 246 if (state != impl->input_state[i]) { 247 ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE, 248 vec, impl->e_vecs[i], request); 249 CeedChkBackend(ierr); 250 impl->input_state[i] = state; 251 } 252 } else { 253 // Set Qvec for CEED_EVAL_NONE 254 if (eval_mode == CEED_EVAL_NONE) { 255 ierr = CeedVectorGetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 256 &impl->e_data[i]); CeedChkBackend(ierr); 257 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 258 CEED_USE_POINTER, 259 impl->e_data[i]); CeedChkBackend(ierr); 260 ierr = CeedVectorRestoreArray(impl->e_vecs_in[i], 261 &impl->e_data[i]); CeedChkBackend(ierr); 262 } 263 } 264 // Get evec 265 ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST, 266 (const CeedScalar **) &impl->e_data[i]); 267 CeedChkBackend(ierr); 268 } 269 } 270 return CEED_ERROR_SUCCESS; 271 } 272 273 //------------------------------------------------------------------------------ 274 // Input Basis Action 275 //------------------------------------------------------------------------------ 276 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, 277 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 278 CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active, 279 CeedOperator_Opt *impl, CeedRequest *request) { 280 CeedInt ierr; 281 CeedInt dim, elem_size, size; 282 CeedElemRestriction elem_restr; 283 CeedEvalMode eval_mode; 284 CeedBasis basis; 285 CeedVector vec; 286 287 for (CeedInt i=0; i<num_input_fields; i++) { 288 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 289 CeedChkBackend(ierr); 290 // Skip active input 291 if (skip_active) { 292 if (vec == CEED_VECTOR_ACTIVE) 293 continue; 294 } 295 296 CeedInt active_in = 0; 297 // Get elem_size, eval_mode, size 298 ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr); 299 CeedChkBackend(ierr); 300 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 301 CeedChkBackend(ierr); 302 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 303 CeedChkBackend(ierr); 304 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 305 CeedChkBackend(ierr); 306 // Restrict block active input 307 if (vec == CEED_VECTOR_ACTIVE) { 308 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size, 309 CEED_NOTRANSPOSE, in_vec, 310 impl->e_vecs_in[i], request); 311 CeedChkBackend(ierr); 312 active_in = 1; 313 } 314 // Basis action 315 switch(eval_mode) { 316 case CEED_EVAL_NONE: 317 if (!active_in) { 318 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 319 CEED_USE_POINTER, 320 &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr); 321 } 322 break; 323 case CEED_EVAL_INTERP: 324 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 325 CeedChkBackend(ierr); 326 if (!active_in) { 327 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 328 CEED_USE_POINTER, 329 &impl->e_data[i][e*elem_size*size]); 330 CeedChkBackend(ierr); 331 } 332 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 333 CEED_EVAL_INTERP, impl->e_vecs_in[i], 334 impl->q_vecs_in[i]); CeedChkBackend(ierr); 335 break; 336 case CEED_EVAL_GRAD: 337 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 338 CeedChkBackend(ierr); 339 if (!active_in) { 340 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 341 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 342 CEED_USE_POINTER, 343 &impl->e_data[i][e*elem_size*size/dim]); 344 CeedChkBackend(ierr); 345 } 346 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 347 CEED_EVAL_GRAD, impl->e_vecs_in[i], 348 impl->q_vecs_in[i]); CeedChkBackend(ierr); 349 break; 350 case CEED_EVAL_WEIGHT: 351 break; // No action 352 // LCOV_EXCL_START 353 case CEED_EVAL_DIV: 354 case CEED_EVAL_CURL: { 355 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 356 CeedChkBackend(ierr); 357 Ceed ceed; 358 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 359 return CeedError(ceed, CEED_ERROR_BACKEND, 360 "Ceed evaluation mode not implemented"); 361 // LCOV_EXCL_STOP 362 } 363 } 364 } 365 return CEED_ERROR_SUCCESS; 366 } 367 368 //------------------------------------------------------------------------------ 369 // Output Basis Action 370 //------------------------------------------------------------------------------ 371 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, 372 CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, 373 CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields, 374 CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl, 375 CeedRequest *request) { 376 CeedInt ierr; 377 CeedElemRestriction elem_restr; 378 CeedEvalMode eval_mode; 379 CeedBasis basis; 380 CeedVector vec; 381 382 for (CeedInt i=0; i<num_output_fields; i++) { 383 // Get elem_size, eval_mode, size 384 ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr); 385 CeedChkBackend(ierr); 386 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 387 CeedChkBackend(ierr); 388 // Basis action 389 switch(eval_mode) { 390 case CEED_EVAL_NONE: 391 break; // No action 392 case CEED_EVAL_INTERP: 393 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 394 CeedChkBackend(ierr); 395 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 396 CEED_EVAL_INTERP, impl->q_vecs_out[i], 397 impl->e_vecs_out[i]); CeedChkBackend(ierr); 398 break; 399 case CEED_EVAL_GRAD: 400 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 401 CeedChkBackend(ierr); 402 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 403 CEED_EVAL_GRAD, impl->q_vecs_out[i], 404 impl->e_vecs_out[i]); CeedChkBackend(ierr); 405 break; 406 // LCOV_EXCL_START 407 case CEED_EVAL_WEIGHT: { 408 Ceed ceed; 409 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 410 return CeedError(ceed, CEED_ERROR_BACKEND, 411 "CEED_EVAL_WEIGHT cannot be an output " 412 "evaluation mode"); 413 } 414 case CEED_EVAL_DIV: 415 case CEED_EVAL_CURL: { 416 Ceed ceed; 417 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 418 return CeedError(ceed, CEED_ERROR_BACKEND, 419 "Ceed evaluation mode not implemented"); 420 // LCOV_EXCL_STOP 421 } 422 } 423 // Restrict output block 424 // Get output vector 425 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 426 CeedChkBackend(ierr); 427 if (vec == CEED_VECTOR_ACTIVE) 428 vec = out_vec; 429 // Restrict 430 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_e_vecs_in], 431 e/blk_size, CEED_TRANSPOSE, 432 impl->e_vecs_out[i], vec, request); 433 CeedChkBackend(ierr); 434 } 435 return CEED_ERROR_SUCCESS; 436 } 437 438 //------------------------------------------------------------------------------ 439 // Restore Input Vectors 440 //------------------------------------------------------------------------------ 441 static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields, 442 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 443 CeedOperator_Opt *impl) { 444 CeedInt ierr; 445 CeedEvalMode eval_mode; 446 447 for (CeedInt i=0; i<num_input_fields; i++) { 448 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 449 CeedChkBackend(ierr); 450 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 451 } else { 452 ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i], 453 (const CeedScalar **) &impl->e_data[i]); 454 CeedChkBackend(ierr); 455 } 456 } 457 return CEED_ERROR_SUCCESS; 458 } 459 460 //------------------------------------------------------------------------------ 461 // Operator Apply 462 //------------------------------------------------------------------------------ 463 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec, 464 CeedVector out_vec, CeedRequest *request) { 465 int ierr; 466 Ceed ceed; 467 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 468 Ceed_Opt *ceed_impl; 469 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 470 CeedInt blk_size = ceed_impl->blk_size; 471 CeedOperator_Opt *impl; 472 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 473 CeedInt Q, num_input_fields, num_output_fields, num_elem; 474 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 475 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 476 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 477 CeedQFunction qf; 478 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 479 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 480 CeedChkBackend(ierr); 481 CeedOperatorField *op_input_fields, *op_output_fields; 482 ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields); 483 CeedChkBackend(ierr); 484 CeedQFunctionField *qf_input_fields, *qf_output_fields; 485 ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields); 486 CeedChkBackend(ierr); 487 CeedEvalMode eval_mode; 488 489 // Setup 490 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 491 492 // Restriction only operator 493 if (impl->is_identity_restr_op) { 494 for (CeedInt b=0; b<num_blks; b++) { 495 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[0], b, CEED_NOTRANSPOSE, 496 in_vec, impl->e_vecs_in[0], request); CeedChkBackend(ierr); 497 ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[1], b, CEED_TRANSPOSE, 498 impl->e_vecs_in[0], out_vec, request); CeedChkBackend(ierr); 499 } 500 return CEED_ERROR_SUCCESS; 501 } 502 503 // Input Evecs and Restriction 504 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 505 op_input_fields, in_vec, impl, request); 506 CeedChkBackend(ierr); 507 508 // Output Lvecs, Evecs, and Qvecs 509 for (CeedInt i=0; i<num_output_fields; i++) { 510 // Set Qvec if needed 511 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 512 CeedChkBackend(ierr); 513 if (eval_mode == CEED_EVAL_NONE) { 514 // Set qvec to single block evec 515 ierr = CeedVectorGetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 516 &impl->e_data[i + num_input_fields]); 517 CeedChkBackend(ierr); 518 ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, 519 CEED_USE_POINTER, impl->e_data[i + num_input_fields]); CeedChkBackend(ierr); 520 ierr = CeedVectorRestoreArray(impl->e_vecs_out[i], 521 &impl->e_data[i + num_input_fields]); 522 CeedChkBackend(ierr); 523 } 524 } 525 526 // Loop through elements 527 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 528 // Input basis apply 529 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 530 num_input_fields, blk_size, in_vec, false, 531 impl, request); CeedChkBackend(ierr); 532 533 // Q function 534 if (!impl->is_identity_qf) { 535 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 536 CeedChkBackend(ierr); 537 } 538 539 // Output basis apply and restrict 540 ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields, 541 blk_size, num_input_fields, num_output_fields, 542 op, out_vec, impl, request); 543 CeedChkBackend(ierr); 544 } 545 546 // Restore input arrays 547 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 548 op_input_fields, impl); 549 CeedChkBackend(ierr); 550 551 return CEED_ERROR_SUCCESS; 552 } 553 554 //------------------------------------------------------------------------------ 555 // Assemble Linear QFunction 556 //------------------------------------------------------------------------------ 557 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op, 558 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 559 int ierr; 560 Ceed ceed; 561 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 562 Ceed_Opt *ceed_impl; 563 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 564 const CeedInt blk_size = ceed_impl->blk_size; 565 CeedOperator_Opt *impl; 566 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 567 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 568 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 569 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 570 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 571 CeedQFunction qf; 572 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 573 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 574 CeedChkBackend(ierr); 575 CeedOperatorField *op_input_fields, *op_output_fields; 576 ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields); 577 CeedChkBackend(ierr); 578 CeedQFunctionField *qf_input_fields, *qf_output_fields; 579 ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields); 580 CeedChkBackend(ierr); 581 CeedVector vec, lvec; 582 CeedInt num_active_in = 0, num_active_out = 0; 583 CeedVector *active_in = NULL; 584 CeedScalar *a, *tmp; 585 586 // Setup 587 ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr); 588 589 // Check for identity 590 if (impl->is_identity_qf) 591 // LCOV_EXCL_START 592 return CeedError(ceed, CEED_ERROR_BACKEND, 593 "Assembling identity qfunctions not supported"); 594 // LCOV_EXCL_STOP 595 596 // Input Evecs and Restriction 597 ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields, 598 op_input_fields, NULL, impl, request); 599 CeedChkBackend(ierr); 600 601 // Count number of active input fields 602 for (CeedInt i=0; i<num_input_fields; i++) { 603 // Get input vector 604 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 605 CeedChkBackend(ierr); 606 // Check if active input 607 if (vec == CEED_VECTOR_ACTIVE) { 608 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 609 CeedChkBackend(ierr); 610 ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr); 611 ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp); 612 CeedChkBackend(ierr); 613 ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr); 614 for (CeedInt field=0; field<size; field++) { 615 ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]); 616 CeedChkBackend(ierr); 617 ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST, 618 CEED_USE_POINTER, &tmp[field*Q*blk_size]); 619 CeedChkBackend(ierr); 620 } 621 num_active_in += size; 622 ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr); 623 } 624 } 625 626 // Count number of active output fields 627 for (CeedInt i=0; i<num_output_fields; i++) { 628 // Get output vector 629 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 630 CeedChkBackend(ierr); 631 // Check if active output 632 if (vec == CEED_VECTOR_ACTIVE) { 633 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 634 CeedChkBackend(ierr); 635 num_active_out += size; 636 } 637 } 638 639 // Check sizes 640 if (!num_active_in || !num_active_out) 641 // LCOV_EXCL_START 642 return CeedError(ceed, CEED_ERROR_BACKEND, 643 "Cannot assemble QFunction without active inputs " 644 "and outputs"); 645 // LCOV_EXCL_STOP 646 647 // Setup lvec 648 ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out, 649 &lvec); CeedChkBackend(ierr); 650 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 651 652 // Create output restriction 653 CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; 654 ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q, 655 num_active_in*num_active_out, 656 num_active_in*num_active_out*num_elem*Q, 657 strides, rstr); CeedChkBackend(ierr); 658 // Create assembled vector 659 ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out, 660 assembled); CeedChkBackend(ierr); 661 662 // Loop through elements 663 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 664 // Input basis apply 665 ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields, 666 num_input_fields, blk_size, NULL, true, 667 impl, request); CeedChkBackend(ierr); 668 669 // Assemble QFunction 670 for (CeedInt in=0; in<num_active_in; in++) { 671 // Set Inputs 672 ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr); 673 if (num_active_in > 1) { 674 ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in], 675 0.0); CeedChkBackend(ierr); 676 } 677 // Set Outputs 678 for (CeedInt out=0; out<num_output_fields; out++) { 679 // Get output vector 680 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 681 CeedChkBackend(ierr); 682 // Check if active output 683 if (vec == CEED_VECTOR_ACTIVE) { 684 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, 685 CEED_USE_POINTER, a); CeedChkBackend(ierr); 686 ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size); 687 CeedChkBackend(ierr); 688 a += size*Q*blk_size; // Advance the pointer by the size of the output 689 } 690 } 691 // Apply QFunction 692 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 693 CeedChkBackend(ierr); 694 } 695 } 696 697 // Un-set output Qvecs to prevent accidental overwrite of Assembled 698 for (CeedInt out=0; out<num_output_fields; out++) { 699 // Get output vector 700 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 701 CeedChkBackend(ierr); 702 // Check if active output 703 if (vec == CEED_VECTOR_ACTIVE) { 704 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, 705 NULL); CeedChkBackend(ierr); 706 } 707 } 708 709 // Restore input arrays 710 ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields, 711 op_input_fields, impl); 712 CeedChkBackend(ierr); 713 714 // Output blocked restriction 715 ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr); 716 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 717 CeedElemRestriction blk_rstr; 718 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size, 719 num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q, 720 strides, &blk_rstr); CeedChkBackend(ierr); 721 ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled, 722 request); CeedChkBackend(ierr); 723 724 // Cleanup 725 for (CeedInt i=0; i<num_active_in; i++) { 726 ierr = CeedVectorDestroy(&active_in[i]); CeedChkBackend(ierr); 727 } 728 ierr = CeedFree(&active_in); CeedChkBackend(ierr); 729 ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr); 730 ierr = CeedElemRestrictionDestroy(&blk_rstr); CeedChkBackend(ierr); 731 732 return CEED_ERROR_SUCCESS; 733 } 734 735 //------------------------------------------------------------------------------ 736 // Operator Destroy 737 //------------------------------------------------------------------------------ 738 static int CeedOperatorDestroy_Opt(CeedOperator op) { 739 int ierr; 740 CeedOperator_Opt *impl; 741 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 742 743 for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) { 744 ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr); 745 ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr); 746 } 747 ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr); 748 ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr); 749 ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr); 750 ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr); 751 752 for (CeedInt i=0; i<impl->num_e_vecs_in; i++) { 753 ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr); 754 ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr); 755 } 756 ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr); 757 ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr); 758 759 for (CeedInt i=0; i<impl->num_e_vecs_out; i++) { 760 ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr); 761 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 762 } 763 ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr); 764 ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr); 765 766 ierr = CeedFree(&impl); CeedChkBackend(ierr); 767 return CEED_ERROR_SUCCESS; 768 } 769 770 //------------------------------------------------------------------------------ 771 // Operator Create 772 //------------------------------------------------------------------------------ 773 int CeedOperatorCreate_Opt(CeedOperator op) { 774 int ierr; 775 Ceed ceed; 776 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 777 Ceed_Opt *ceed_impl; 778 ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr); 779 CeedInt blk_size = ceed_impl->blk_size; 780 CeedOperator_Opt *impl; 781 782 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 783 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 784 785 if (blk_size != 1 && blk_size != 8) 786 // LCOV_EXCL_START 787 return CeedError(ceed, CEED_ERROR_BACKEND, 788 "Opt backend cannot use blocksize: %d", blk_size); 789 // LCOV_EXCL_STOP 790 791 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 792 CeedOperatorLinearAssembleQFunction_Opt); 793 CeedChkBackend(ierr); 794 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 795 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr); 796 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 797 CeedOperatorDestroy_Opt); CeedChkBackend(ierr); 798 return CEED_ERROR_SUCCESS; 799 } 800 //------------------------------------------------------------------------------ 801