1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <stddef.h> 21 #include <stdint.h> 22 #include "ceed-blocked.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 28 CeedOperator op, bool is_input, CeedElemRestriction *blk_restr, 29 CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs, 30 CeedInt start_e, CeedInt num_fields, CeedInt Q) { 31 CeedInt ierr, num_comp, size, P; 32 CeedSize e_size, q_size; 33 Ceed ceed; 34 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 35 CeedBasis basis; 36 CeedElemRestriction r; 37 CeedOperatorField *op_fields; 38 CeedQFunctionField *qf_fields; 39 if (is_input) { 40 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); 41 CeedChkBackend(ierr); 42 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); 43 CeedChkBackend(ierr); 44 } else { 45 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); 46 CeedChkBackend(ierr); 47 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); 48 CeedChkBackend(ierr); 49 } 50 const CeedInt blk_size = 8; 51 52 // Loop over fields 53 for (CeedInt i=0; i<num_fields; i++) { 54 CeedEvalMode eval_mode; 55 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 56 CeedChkBackend(ierr); 57 58 if (eval_mode != CEED_EVAL_WEIGHT) { 59 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r); 60 CeedChkBackend(ierr); 61 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 62 CeedSize l_size; 63 CeedInt num_elem, elem_size, comp_stride; 64 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 65 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 66 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 67 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 68 69 bool strided; 70 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 71 if (strided) { 72 CeedInt strides[3]; 73 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 74 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size, 75 blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]); 76 CeedChkBackend(ierr); 77 } else { 78 const CeedInt *offsets = NULL; 79 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 80 CeedChkBackend(ierr); 81 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 82 ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size, 83 blk_size, num_comp, comp_stride, 84 l_size, CEED_MEM_HOST, 85 CEED_COPY_VALUES, offsets, 86 &blk_restr[i+start_e]); 87 CeedChkBackend(ierr); 88 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 89 } 90 ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL, 91 &e_vecs_full[i+start_e]); 92 CeedChkBackend(ierr); 93 } 94 95 switch(eval_mode) { 96 case CEED_EVAL_NONE: 97 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 98 q_size = (CeedSize)Q*size*blk_size; 99 ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr); 100 break; 101 case CEED_EVAL_INTERP: 102 case CEED_EVAL_GRAD: 103 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 104 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 105 ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr); 106 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr); 107 e_size = (CeedSize)P*num_comp*blk_size; 108 ierr = CeedVectorCreate(ceed, e_size, &e_vecs[i]); CeedChkBackend(ierr); 109 q_size = (CeedSize)Q*size*blk_size; 110 ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr); 111 break; 112 case CEED_EVAL_WEIGHT: // Only on input fields 113 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 114 q_size = (CeedSize)Q*blk_size; 115 ierr = CeedVectorCreate(ceed, q_size, &q_vecs[i]); CeedChkBackend(ierr); 116 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 117 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]); 118 CeedChkBackend(ierr); 119 120 break; 121 case CEED_EVAL_DIV: 122 break; // Not implemented 123 case CEED_EVAL_CURL: 124 break; // Not implemented 125 } 126 } 127 return CEED_ERROR_SUCCESS; 128 } 129 130 //------------------------------------------------------------------------------ 131 // Setup Operator 132 //------------------------------------------------------------------------------ 133 static int CeedOperatorSetup_Blocked(CeedOperator op) { 134 int ierr; 135 bool is_setup_done; 136 ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr); 137 if (is_setup_done) return CEED_ERROR_SUCCESS; 138 Ceed ceed; 139 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 140 CeedOperator_Blocked *impl; 141 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 142 CeedQFunction qf; 143 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 144 CeedInt Q, num_input_fields, num_output_fields; 145 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 146 ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr); 147 CeedOperatorField *op_input_fields, *op_output_fields; 148 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 149 &num_output_fields, &op_output_fields); 150 CeedChkBackend(ierr); 151 CeedQFunctionField *qf_input_fields, *qf_output_fields; 152 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 153 &qf_output_fields); 154 CeedChkBackend(ierr); 155 156 // Allocate 157 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr); 158 CeedChkBackend(ierr); 159 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full); 160 CeedChkBackend(ierr); 161 162 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr); 163 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr); 164 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr); 165 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr); 166 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr); 167 168 impl->num_inputs = num_input_fields; 169 impl->num_outputs = num_output_fields; 170 171 // Set up infield and outfield pointer arrays 172 // Infields 173 ierr = CeedOperatorSetupFields_Blocked(qf, op, true, impl->blk_restr, 174 impl->e_vecs_full, impl->e_vecs_in, 175 impl->q_vecs_in, 0, 176 num_input_fields, Q); 177 CeedChkBackend(ierr); 178 // Outfields 179 ierr = CeedOperatorSetupFields_Blocked(qf, op, false, impl->blk_restr, 180 impl->e_vecs_full, impl->e_vecs_out, 181 impl->q_vecs_out, num_input_fields, 182 num_output_fields, Q); 183 CeedChkBackend(ierr); 184 185 // Identity QFunctions 186 if (impl->is_identity_qf) { 187 CeedEvalMode in_mode, out_mode; 188 CeedQFunctionField *in_fields, *out_fields; 189 ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields); 190 CeedChkBackend(ierr); 191 ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode); 192 CeedChkBackend(ierr); 193 ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode); 194 CeedChkBackend(ierr); 195 196 if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) { 197 impl->is_identity_restr_op = true; 198 } else { 199 ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr); 200 impl->q_vecs_out[0] = impl->q_vecs_in[0]; 201 ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr); 202 } 203 } 204 205 ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 206 207 return CEED_ERROR_SUCCESS; 208 } 209 210 //------------------------------------------------------------------------------ 211 // Setup Operator Inputs 212 //------------------------------------------------------------------------------ 213 static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields, 214 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 215 CeedVector in_vec, bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX], 216 CeedOperator_Blocked *impl, CeedRequest *request) { 217 CeedInt ierr; 218 CeedEvalMode eval_mode; 219 CeedVector vec; 220 uint64_t state; 221 222 for (CeedInt i=0; i<num_input_fields; i++) { 223 // Get input vector 224 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 225 CeedChkBackend(ierr); 226 if (vec == CEED_VECTOR_ACTIVE) { 227 if (skip_active) 228 continue; 229 else 230 vec = in_vec; 231 } 232 233 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 234 CeedChkBackend(ierr); 235 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 236 } else { 237 // Restrict 238 ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 239 if (state != impl->input_states[i] || vec == in_vec) { 240 ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE, 241 vec, impl->e_vecs_full[i], request); 242 CeedChkBackend(ierr); 243 impl->input_states[i] = state; 244 } 245 // Get evec 246 ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, 247 (const CeedScalar **) &e_data_full[i]); 248 CeedChkBackend(ierr); 249 } 250 } 251 return CEED_ERROR_SUCCESS; 252 } 253 254 //------------------------------------------------------------------------------ 255 // Input Basis Action 256 //------------------------------------------------------------------------------ 257 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 258 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 259 CeedInt num_input_fields, CeedInt blk_size, bool skip_active, 260 CeedScalar *e_data_full[2*CEED_FIELD_MAX], CeedOperator_Blocked *impl) { 261 CeedInt ierr; 262 CeedInt dim, elem_size, size; 263 CeedElemRestriction elem_restr; 264 CeedEvalMode eval_mode; 265 CeedBasis basis; 266 267 for (CeedInt i=0; i<num_input_fields; i++) { 268 // Skip active input 269 if (skip_active) { 270 CeedVector vec; 271 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 272 CeedChkBackend(ierr); 273 if (vec == CEED_VECTOR_ACTIVE) 274 continue; 275 } 276 277 // Get elem_size, eval_mode, size 278 ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr); 279 CeedChkBackend(ierr); 280 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 281 CeedChkBackend(ierr); 282 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 283 CeedChkBackend(ierr); 284 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 285 CeedChkBackend(ierr); 286 // Basis action 287 switch(eval_mode) { 288 case CEED_EVAL_NONE: 289 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 290 CEED_USE_POINTER, &e_data_full[i][e*Q*size]); 291 CeedChkBackend(ierr); 292 break; 293 case CEED_EVAL_INTERP: 294 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 295 CeedChkBackend(ierr); 296 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 297 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size]); 298 CeedChkBackend(ierr); 299 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 300 CEED_EVAL_INTERP, impl->e_vecs_in[i], 301 impl->q_vecs_in[i]); CeedChkBackend(ierr); 302 break; 303 case CEED_EVAL_GRAD: 304 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 305 CeedChkBackend(ierr); 306 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 307 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 308 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size/dim]); 309 CeedChkBackend(ierr); 310 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 311 CEED_EVAL_GRAD, impl->e_vecs_in[i], 312 impl->q_vecs_in[i]); CeedChkBackend(ierr); 313 break; 314 case CEED_EVAL_WEIGHT: 315 break; // No action 316 // LCOV_EXCL_START 317 case CEED_EVAL_DIV: 318 case CEED_EVAL_CURL: { 319 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 320 CeedChkBackend(ierr); 321 Ceed ceed; 322 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 323 return CeedError(ceed, CEED_ERROR_BACKEND, 324 "Ceed evaluation mode not implemented"); 325 // LCOV_EXCL_STOP 326 } 327 } 328 } 329 return CEED_ERROR_SUCCESS; 330 } 331 332 //------------------------------------------------------------------------------ 333 // Output Basis Action 334 //------------------------------------------------------------------------------ 335 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 336 CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, 337 CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields, 338 CeedOperator op, CeedScalar *e_data_full[2*CEED_FIELD_MAX], 339 CeedOperator_Blocked *impl) { 340 CeedInt ierr; 341 CeedInt dim, elem_size, size; 342 CeedElemRestriction elem_restr; 343 CeedEvalMode eval_mode; 344 CeedBasis basis; 345 346 for (CeedInt i=0; i<num_output_fields; i++) { 347 // Get elem_size, eval_mode, size 348 ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr); 349 CeedChkBackend(ierr); 350 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 351 CeedChkBackend(ierr); 352 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 353 CeedChkBackend(ierr); 354 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 355 CeedChkBackend(ierr); 356 // Basis action 357 switch(eval_mode) { 358 case CEED_EVAL_NONE: 359 break; // No action 360 case CEED_EVAL_INTERP: 361 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 362 CeedChkBackend(ierr); 363 ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 364 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size]); 365 CeedChkBackend(ierr); 366 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 367 CEED_EVAL_INTERP, impl->q_vecs_out[i], 368 impl->e_vecs_out[i]); CeedChkBackend(ierr); 369 break; 370 case CEED_EVAL_GRAD: 371 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 372 CeedChkBackend(ierr); 373 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 374 ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 375 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size/dim]); 376 CeedChkBackend(ierr); 377 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 378 CEED_EVAL_GRAD, impl->q_vecs_out[i], 379 impl->e_vecs_out[i]); CeedChkBackend(ierr); 380 break; 381 // LCOV_EXCL_START 382 case CEED_EVAL_WEIGHT: { 383 Ceed ceed; 384 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 385 return CeedError(ceed, CEED_ERROR_BACKEND, 386 "CEED_EVAL_WEIGHT cannot be an output " 387 "evaluation mode"); 388 } 389 case CEED_EVAL_DIV: 390 case CEED_EVAL_CURL: { 391 Ceed ceed; 392 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 393 return CeedError(ceed, CEED_ERROR_BACKEND, 394 "Ceed evaluation mode not implemented"); 395 // LCOV_EXCL_STOP 396 } 397 } 398 } 399 return CEED_ERROR_SUCCESS; 400 } 401 402 //------------------------------------------------------------------------------ 403 // Restore Input Vectors 404 //------------------------------------------------------------------------------ 405 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields, 406 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 407 bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX], 408 CeedOperator_Blocked *impl) { 409 CeedInt ierr; 410 CeedEvalMode eval_mode; 411 412 for (CeedInt i=0; i<num_input_fields; i++) { 413 // Skip active inputs 414 if (skip_active) { 415 CeedVector vec; 416 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 417 CeedChkBackend(ierr); 418 if (vec == CEED_VECTOR_ACTIVE) 419 continue; 420 } 421 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 422 CeedChkBackend(ierr); 423 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 424 } else { 425 ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i], 426 (const CeedScalar **) &e_data_full[i]); 427 CeedChkBackend(ierr); 428 } 429 } 430 return CEED_ERROR_SUCCESS; 431 } 432 433 //------------------------------------------------------------------------------ 434 // Operator Apply 435 //------------------------------------------------------------------------------ 436 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec, 437 CeedVector out_vec, 438 CeedRequest *request) { 439 int ierr; 440 CeedOperator_Blocked *impl; 441 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 442 const CeedInt blk_size = 8; 443 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 444 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 445 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 446 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 447 CeedQFunction qf; 448 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 449 CeedOperatorField *op_input_fields, *op_output_fields; 450 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 451 &num_output_fields, &op_output_fields); 452 CeedChkBackend(ierr); 453 CeedQFunctionField *qf_input_fields, *qf_output_fields; 454 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 455 &qf_output_fields); 456 CeedChkBackend(ierr); 457 CeedEvalMode eval_mode; 458 CeedVector vec; 459 CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0}; 460 461 // Setup 462 ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 463 464 // Restriction only operator 465 if (impl->is_identity_restr_op) { 466 ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec, 467 impl->e_vecs_full[0], request); CeedChkBackend(ierr); 468 ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE, 469 impl->e_vecs_full[0], out_vec, request); CeedChkBackend(ierr); 470 return CEED_ERROR_SUCCESS; 471 } 472 473 // Input Evecs and Restriction 474 ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields, 475 op_input_fields, in_vec, false, e_data_full, 476 impl, request); CeedChkBackend(ierr); 477 478 // Output Evecs 479 for (CeedInt i=0; i<num_output_fields; i++) { 480 ierr = CeedVectorGetArrayWrite(impl->e_vecs_full[i+impl->num_inputs], 481 CEED_MEM_HOST, &e_data_full[i + num_input_fields]); 482 CeedChkBackend(ierr); 483 } 484 485 // Loop through elements 486 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 487 // Output pointers 488 for (CeedInt i=0; i<num_output_fields; i++) { 489 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 490 CeedChkBackend(ierr); 491 if (eval_mode == CEED_EVAL_NONE) { 492 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 493 CeedChkBackend(ierr); 494 ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, 495 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*Q*size]); 496 CeedChkBackend(ierr); 497 } 498 } 499 500 // Input basis apply 501 ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields, 502 num_input_fields, blk_size, false, e_data_full, 503 impl); CeedChkBackend(ierr); 504 505 // Q function 506 if (!impl->is_identity_qf) { 507 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 508 CeedChkBackend(ierr); 509 } 510 511 // Output basis apply 512 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields, 513 blk_size, num_input_fields, 514 num_output_fields, op, e_data_full, impl); 515 CeedChkBackend(ierr); 516 } 517 518 // Output restriction 519 for (CeedInt i=0; i<num_output_fields; i++) { 520 // Restore evec 521 ierr = CeedVectorRestoreArray(impl->e_vecs_full[i+impl->num_inputs], 522 &e_data_full[i + num_input_fields]); 523 CeedChkBackend(ierr); 524 // Get output vector 525 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 526 CeedChkBackend(ierr); 527 // Active 528 if (vec == CEED_VECTOR_ACTIVE) 529 vec = out_vec; 530 // Restrict 531 ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_inputs], 532 CEED_TRANSPOSE, impl->e_vecs_full[i+impl->num_inputs], 533 vec, request); CeedChkBackend(ierr); 534 535 } 536 537 // Restore input arrays 538 ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields, 539 op_input_fields, false, e_data_full, impl); CeedChkBackend(ierr); 540 541 return CEED_ERROR_SUCCESS; 542 } 543 544 //------------------------------------------------------------------------------ 545 // Core code for assembling linear QFunction 546 //------------------------------------------------------------------------------ 547 static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked( 548 CeedOperator op, bool build_objects, CeedVector *assembled, 549 CeedElemRestriction *rstr, CeedRequest *request) { 550 int ierr; 551 CeedOperator_Blocked *impl; 552 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 553 const CeedInt blk_size = 8; 554 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 555 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 556 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 557 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 558 CeedQFunction qf; 559 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 560 CeedOperatorField *op_input_fields, *op_output_fields; 561 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 562 &num_output_fields, &op_output_fields); 563 CeedChkBackend(ierr); 564 CeedQFunctionField *qf_input_fields, *qf_output_fields; 565 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 566 &qf_output_fields); 567 CeedChkBackend(ierr); 568 CeedVector vec, l_vec = impl->qf_l_vec; 569 CeedInt num_active_in = impl->num_active_in, 570 num_active_out = impl->num_active_out; 571 CeedSize q_size; 572 CeedVector *active_in = impl->qf_active_in; 573 CeedScalar *a, *tmp; 574 Ceed ceed; 575 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 576 CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0}; 577 578 // Setup 579 ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 580 581 // Check for identity 582 if (impl->is_identity_qf) 583 // LCOV_EXCL_START 584 return CeedError(ceed, CEED_ERROR_BACKEND, 585 "Assembling identity QFunctions not supported"); 586 // LCOV_EXCL_STOP 587 588 // Input Evecs and Restriction 589 ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields, 590 op_input_fields, NULL, true, e_data_full, 591 impl, request); CeedChkBackend(ierr); 592 593 // Count number of active input fields 594 if (!num_active_in) { 595 for (CeedInt i=0; i<num_input_fields; i++) { 596 // Get input vector 597 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 598 CeedChkBackend(ierr); 599 // Check if active input 600 if (vec == CEED_VECTOR_ACTIVE) { 601 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 602 CeedChkBackend(ierr); 603 ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr); 604 ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp); 605 CeedChkBackend(ierr); 606 ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr); 607 for (CeedInt field=0; field<size; field++) { 608 q_size = (CeedSize)Q*blk_size; 609 ierr = CeedVectorCreate(ceed, q_size, &active_in[num_active_in+field]); 610 CeedChkBackend(ierr); 611 ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST, 612 CEED_USE_POINTER, &tmp[field*Q*blk_size]); 613 CeedChkBackend(ierr); 614 } 615 num_active_in += size; 616 ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr); 617 } 618 } 619 impl->num_active_in = num_active_in; 620 impl->qf_active_in = active_in; 621 } 622 623 // Count number of active output fields 624 if (!num_active_out) { 625 for (CeedInt i=0; i<num_output_fields; i++) { 626 // Get output vector 627 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 628 CeedChkBackend(ierr); 629 // Check if active output 630 if (vec == CEED_VECTOR_ACTIVE) { 631 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 632 CeedChkBackend(ierr); 633 num_active_out += size; 634 } 635 } 636 impl->num_active_out = num_active_out; 637 } 638 639 // Check sizes 640 if (!num_active_in || !num_active_out) 641 // LCOV_EXCL_START 642 return CeedError(ceed, CEED_ERROR_BACKEND, 643 "Cannot assemble QFunction without active inputs " 644 "and outputs"); 645 // LCOV_EXCL_STOP 646 647 // Setup Lvec 648 if (!l_vec) { 649 CeedSize l_size = (CeedSize)num_blks*blk_size*Q*num_active_in*num_active_out; 650 ierr = CeedVectorCreate(ceed, l_size, &l_vec); CeedChkBackend(ierr); 651 impl->qf_l_vec = l_vec; 652 } 653 ierr = CeedVectorGetArrayWrite(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 654 655 // Build objects if needed 656 CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; 657 if (build_objects) { 658 // Create output restriction 659 ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q, 660 num_active_in*num_active_out, 661 num_active_in*num_active_out*num_elem*Q, 662 strides, rstr); CeedChkBackend(ierr); 663 // Create assembled vector 664 CeedSize l_size = (CeedSize)num_elem*Q*num_active_in*num_active_out; 665 ierr = CeedVectorCreate(ceed, l_size, assembled); CeedChkBackend(ierr); 666 } 667 668 // Loop through elements 669 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 670 // Input basis apply 671 ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields, 672 num_input_fields, blk_size, true, e_data_full, 673 impl); CeedChkBackend(ierr); 674 675 // Assemble QFunction 676 for (CeedInt in=0; in<num_active_in; in++) { 677 // Set Inputs 678 ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr); 679 if (num_active_in > 1) { 680 ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in], 681 0.0); CeedChkBackend(ierr); 682 } 683 // Set Outputs 684 for (CeedInt out=0; out<num_output_fields; out++) { 685 // Get output vector 686 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 687 CeedChkBackend(ierr); 688 // Check if active output 689 if (vec == CEED_VECTOR_ACTIVE) { 690 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, 691 CEED_USE_POINTER, a); CeedChkBackend(ierr); 692 ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size); 693 CeedChkBackend(ierr); 694 a += size*Q*blk_size; // Advance the pointer by the size of the output 695 } 696 } 697 // Apply QFunction 698 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 699 CeedChkBackend(ierr); 700 } 701 } 702 703 // Un-set output Qvecs to prevent accidental overwrite of Assembled 704 for (CeedInt out=0; out<num_output_fields; out++) { 705 // Get output vector 706 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 707 CeedChkBackend(ierr); 708 // Check if active output 709 if (vec == CEED_VECTOR_ACTIVE) { 710 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, 711 NULL); CeedChkBackend(ierr); 712 } 713 } 714 715 // Restore input arrays 716 ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields, 717 op_input_fields, true, e_data_full, impl); CeedChkBackend(ierr); 718 719 // Output blocked restriction 720 ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr); 721 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 722 CeedElemRestriction blk_rstr = impl->qf_blk_rstr; 723 if (!impl->qf_blk_rstr) { 724 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size, 725 num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q, 726 strides, &blk_rstr); CeedChkBackend(ierr); 727 impl->qf_blk_rstr = blk_rstr; 728 } 729 ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled, 730 request); CeedChkBackend(ierr); 731 732 return CEED_ERROR_SUCCESS; 733 } 734 735 //------------------------------------------------------------------------------ 736 // Assemble Linear QFunction 737 //------------------------------------------------------------------------------ 738 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op, 739 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 740 return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, true, assembled, 741 rstr, request); 742 } 743 744 //------------------------------------------------------------------------------ 745 // Update Assembled Linear QFunction 746 //------------------------------------------------------------------------------ 747 static int CeedOperatorLinearAssembleQFunctionUpdate_Blocked(CeedOperator op, 748 CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 749 return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, false, &assembled, 750 &rstr, request); 751 } 752 753 //------------------------------------------------------------------------------ 754 // Operator Destroy 755 //------------------------------------------------------------------------------ 756 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 757 int ierr; 758 CeedOperator_Blocked *impl; 759 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 760 761 for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) { 762 ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr); 763 ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr); 764 } 765 ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr); 766 ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr); 767 ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr); 768 769 for (CeedInt i=0; i<impl->num_inputs; i++) { 770 ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr); 771 ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr); 772 } 773 ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr); 774 ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr); 775 776 for (CeedInt i=0; i<impl->num_outputs; i++) { 777 ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr); 778 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 779 } 780 ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr); 781 ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr); 782 783 // QFunction assembly data 784 for (CeedInt i=0; i<impl->num_active_in; i++) { 785 ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr); 786 } 787 ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr); 788 ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr); 789 ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr); 790 791 ierr = CeedFree(&impl); CeedChkBackend(ierr); 792 return CEED_ERROR_SUCCESS; 793 } 794 795 //------------------------------------------------------------------------------ 796 // Operator Create 797 //------------------------------------------------------------------------------ 798 int CeedOperatorCreate_Blocked(CeedOperator op) { 799 int ierr; 800 Ceed ceed; 801 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 802 CeedOperator_Blocked *impl; 803 804 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 805 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 806 807 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 808 CeedOperatorLinearAssembleQFunction_Blocked); 809 CeedChkBackend(ierr); 810 ierr = CeedSetBackendFunction(ceed, "Operator", op, 811 "LinearAssembleQFunctionUpdate", 812 CeedOperatorLinearAssembleQFunctionUpdate_Blocked); 813 CeedChkBackend(ierr); 814 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 815 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr); 816 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 817 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr); 818 return CEED_ERROR_SUCCESS; 819 } 820 //------------------------------------------------------------------------------ 821