1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <stddef.h> 21 #include <stdint.h> 22 #include "ceed-blocked.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 28 CeedOperator op, bool is_input, CeedElemRestriction *blk_restr, 29 CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs, 30 CeedInt start_e, CeedInt num_fields, CeedInt Q) { 31 CeedInt ierr, num_comp, size, P; 32 Ceed ceed; 33 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 34 CeedBasis basis; 35 CeedElemRestriction r; 36 CeedOperatorField *op_fields; 37 CeedQFunctionField *qf_fields; 38 if (is_input) { 39 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); 40 CeedChkBackend(ierr); 41 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); 42 CeedChkBackend(ierr); 43 } else { 44 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); 45 CeedChkBackend(ierr); 46 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); 47 CeedChkBackend(ierr); 48 } 49 const CeedInt blk_size = 8; 50 51 // Loop over fields 52 for (CeedInt i=0; i<num_fields; i++) { 53 CeedEvalMode eval_mode; 54 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 55 CeedChkBackend(ierr); 56 57 if (eval_mode != CEED_EVAL_WEIGHT) { 58 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r); 59 CeedChkBackend(ierr); 60 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 61 CeedInt num_elem, elem_size, l_size, comp_stride; 62 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 63 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 64 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 65 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 66 67 bool strided; 68 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 69 if (strided) { 70 CeedInt strides[3]; 71 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 72 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size, 73 blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]); 74 CeedChkBackend(ierr); 75 } else { 76 const CeedInt *offsets = NULL; 77 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 78 CeedChkBackend(ierr); 79 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 80 ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size, 81 blk_size, num_comp, comp_stride, 82 l_size, CEED_MEM_HOST, 83 CEED_COPY_VALUES, offsets, 84 &blk_restr[i+start_e]); 85 CeedChkBackend(ierr); 86 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 87 } 88 ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL, 89 &e_vecs_full[i+start_e]); 90 CeedChkBackend(ierr); 91 } 92 93 switch(eval_mode) { 94 case CEED_EVAL_NONE: 95 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 96 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 97 CeedChkBackend(ierr); 98 break; 99 case CEED_EVAL_INTERP: 100 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 101 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 102 ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr); 103 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr); 104 ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]); 105 CeedChkBackend(ierr); 106 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 107 CeedChkBackend(ierr); 108 break; 109 case CEED_EVAL_GRAD: 110 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 111 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 112 ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr); 113 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr); 114 ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]); 115 CeedChkBackend(ierr); 116 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 117 CeedChkBackend(ierr); 118 break; 119 case CEED_EVAL_WEIGHT: // Only on input fields 120 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 121 ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr); 122 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 123 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]); 124 CeedChkBackend(ierr); 125 126 break; 127 case CEED_EVAL_DIV: 128 break; // Not implemented 129 case CEED_EVAL_CURL: 130 break; // Not implemented 131 } 132 } 133 return CEED_ERROR_SUCCESS; 134 } 135 136 //------------------------------------------------------------------------------ 137 // Setup Operator 138 //------------------------------------------------------------------------------ 139 static int CeedOperatorSetup_Blocked(CeedOperator op) { 140 int ierr; 141 bool is_setup_done; 142 ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr); 143 if (is_setup_done) return CEED_ERROR_SUCCESS; 144 Ceed ceed; 145 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 146 CeedOperator_Blocked *impl; 147 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 148 CeedQFunction qf; 149 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 150 CeedInt Q, num_input_fields, num_output_fields; 151 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 152 ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr); 153 CeedOperatorField *op_input_fields, *op_output_fields; 154 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 155 &num_output_fields, &op_output_fields); 156 CeedChkBackend(ierr); 157 CeedQFunctionField *qf_input_fields, *qf_output_fields; 158 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 159 &qf_output_fields); 160 CeedChkBackend(ierr); 161 162 // Allocate 163 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr); 164 CeedChkBackend(ierr); 165 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full); 166 CeedChkBackend(ierr); 167 168 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr); 169 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr); 170 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr); 171 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr); 172 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr); 173 174 impl->num_inputs = num_input_fields; 175 impl->num_outputs = num_output_fields; 176 177 // Set up infield and outfield pointer arrays 178 // Infields 179 ierr = CeedOperatorSetupFields_Blocked(qf, op, true, impl->blk_restr, 180 impl->e_vecs_full, impl->e_vecs_in, 181 impl->q_vecs_in, 0, 182 num_input_fields, Q); 183 CeedChkBackend(ierr); 184 // Outfields 185 ierr = CeedOperatorSetupFields_Blocked(qf, op, false, impl->blk_restr, 186 impl->e_vecs_full, impl->e_vecs_out, 187 impl->q_vecs_out, num_input_fields, 188 num_output_fields, Q); 189 CeedChkBackend(ierr); 190 191 // Identity QFunctions 192 if (impl->is_identity_qf) { 193 CeedEvalMode in_mode, out_mode; 194 CeedQFunctionField *in_fields, *out_fields; 195 ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields); 196 CeedChkBackend(ierr); 197 ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode); 198 CeedChkBackend(ierr); 199 ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode); 200 CeedChkBackend(ierr); 201 202 if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) { 203 impl->is_identity_restr_op = true; 204 } else { 205 ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr); 206 impl->q_vecs_out[0] = impl->q_vecs_in[0]; 207 ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr); 208 } 209 } 210 211 ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 212 213 return CEED_ERROR_SUCCESS; 214 } 215 216 //------------------------------------------------------------------------------ 217 // Setup Operator Inputs 218 //------------------------------------------------------------------------------ 219 static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields, 220 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 221 CeedVector in_vec, bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX], 222 CeedOperator_Blocked *impl, CeedRequest *request) { 223 CeedInt ierr; 224 CeedEvalMode eval_mode; 225 CeedVector vec; 226 uint64_t state; 227 228 for (CeedInt i=0; i<num_input_fields; i++) { 229 // Get input vector 230 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 231 CeedChkBackend(ierr); 232 if (vec == CEED_VECTOR_ACTIVE) { 233 if (skip_active) 234 continue; 235 else 236 vec = in_vec; 237 } 238 239 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 240 CeedChkBackend(ierr); 241 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 242 } else { 243 // Restrict 244 ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 245 if (state != impl->input_states[i] || vec == in_vec) { 246 ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE, 247 vec, impl->e_vecs_full[i], request); 248 CeedChkBackend(ierr); 249 impl->input_states[i] = state; 250 } 251 // Get evec 252 ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, 253 (const CeedScalar **) &e_data_full[i]); 254 CeedChkBackend(ierr); 255 } 256 } 257 return CEED_ERROR_SUCCESS; 258 } 259 260 //------------------------------------------------------------------------------ 261 // Input Basis Action 262 //------------------------------------------------------------------------------ 263 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 264 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 265 CeedInt num_input_fields, CeedInt blk_size, bool skip_active, 266 CeedScalar *e_data_full[2*CEED_FIELD_MAX], CeedOperator_Blocked *impl) { 267 CeedInt ierr; 268 CeedInt dim, elem_size, size; 269 CeedElemRestriction elem_restr; 270 CeedEvalMode eval_mode; 271 CeedBasis basis; 272 273 for (CeedInt i=0; i<num_input_fields; i++) { 274 // Skip active input 275 if (skip_active) { 276 CeedVector vec; 277 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 278 CeedChkBackend(ierr); 279 if (vec == CEED_VECTOR_ACTIVE) 280 continue; 281 } 282 283 // Get elem_size, eval_mode, size 284 ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr); 285 CeedChkBackend(ierr); 286 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 287 CeedChkBackend(ierr); 288 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 289 CeedChkBackend(ierr); 290 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 291 CeedChkBackend(ierr); 292 // Basis action 293 switch(eval_mode) { 294 case CEED_EVAL_NONE: 295 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 296 CEED_USE_POINTER, &e_data_full[i][e*Q*size]); 297 CeedChkBackend(ierr); 298 break; 299 case CEED_EVAL_INTERP: 300 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 301 CeedChkBackend(ierr); 302 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 303 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size]); 304 CeedChkBackend(ierr); 305 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 306 CEED_EVAL_INTERP, impl->e_vecs_in[i], 307 impl->q_vecs_in[i]); CeedChkBackend(ierr); 308 break; 309 case CEED_EVAL_GRAD: 310 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 311 CeedChkBackend(ierr); 312 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 313 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 314 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size/dim]); 315 CeedChkBackend(ierr); 316 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 317 CEED_EVAL_GRAD, impl->e_vecs_in[i], 318 impl->q_vecs_in[i]); CeedChkBackend(ierr); 319 break; 320 case CEED_EVAL_WEIGHT: 321 break; // No action 322 // LCOV_EXCL_START 323 case CEED_EVAL_DIV: 324 case CEED_EVAL_CURL: { 325 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 326 CeedChkBackend(ierr); 327 Ceed ceed; 328 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 329 return CeedError(ceed, CEED_ERROR_BACKEND, 330 "Ceed evaluation mode not implemented"); 331 // LCOV_EXCL_STOP 332 } 333 } 334 } 335 return CEED_ERROR_SUCCESS; 336 } 337 338 //------------------------------------------------------------------------------ 339 // Output Basis Action 340 //------------------------------------------------------------------------------ 341 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 342 CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, 343 CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields, 344 CeedOperator op, CeedScalar *e_data_full[2*CEED_FIELD_MAX], 345 CeedOperator_Blocked *impl) { 346 CeedInt ierr; 347 CeedInt dim, elem_size, size; 348 CeedElemRestriction elem_restr; 349 CeedEvalMode eval_mode; 350 CeedBasis basis; 351 352 for (CeedInt i=0; i<num_output_fields; i++) { 353 // Get elem_size, eval_mode, size 354 ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr); 355 CeedChkBackend(ierr); 356 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 357 CeedChkBackend(ierr); 358 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 359 CeedChkBackend(ierr); 360 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 361 CeedChkBackend(ierr); 362 // Basis action 363 switch(eval_mode) { 364 case CEED_EVAL_NONE: 365 break; // No action 366 case CEED_EVAL_INTERP: 367 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 368 CeedChkBackend(ierr); 369 ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 370 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size]); 371 CeedChkBackend(ierr); 372 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 373 CEED_EVAL_INTERP, impl->q_vecs_out[i], 374 impl->e_vecs_out[i]); CeedChkBackend(ierr); 375 break; 376 case CEED_EVAL_GRAD: 377 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 378 CeedChkBackend(ierr); 379 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 380 ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 381 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size/dim]); 382 CeedChkBackend(ierr); 383 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 384 CEED_EVAL_GRAD, impl->q_vecs_out[i], 385 impl->e_vecs_out[i]); CeedChkBackend(ierr); 386 break; 387 // LCOV_EXCL_START 388 case CEED_EVAL_WEIGHT: { 389 Ceed ceed; 390 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 391 return CeedError(ceed, CEED_ERROR_BACKEND, 392 "CEED_EVAL_WEIGHT cannot be an output " 393 "evaluation mode"); 394 } 395 case CEED_EVAL_DIV: 396 case CEED_EVAL_CURL: { 397 Ceed ceed; 398 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 399 return CeedError(ceed, CEED_ERROR_BACKEND, 400 "Ceed evaluation mode not implemented"); 401 // LCOV_EXCL_STOP 402 } 403 } 404 } 405 return CEED_ERROR_SUCCESS; 406 } 407 408 //------------------------------------------------------------------------------ 409 // Restore Input Vectors 410 //------------------------------------------------------------------------------ 411 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields, 412 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 413 bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX], 414 CeedOperator_Blocked *impl) { 415 CeedInt ierr; 416 CeedEvalMode eval_mode; 417 418 for (CeedInt i=0; i<num_input_fields; i++) { 419 // Skip active inputs 420 if (skip_active) { 421 CeedVector vec; 422 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 423 CeedChkBackend(ierr); 424 if (vec == CEED_VECTOR_ACTIVE) 425 continue; 426 } 427 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 428 CeedChkBackend(ierr); 429 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 430 } else { 431 ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i], 432 (const CeedScalar **) &e_data_full[i]); 433 CeedChkBackend(ierr); 434 } 435 } 436 return CEED_ERROR_SUCCESS; 437 } 438 439 //------------------------------------------------------------------------------ 440 // Operator Apply 441 //------------------------------------------------------------------------------ 442 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec, 443 CeedVector out_vec, 444 CeedRequest *request) { 445 int ierr; 446 CeedOperator_Blocked *impl; 447 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 448 const CeedInt blk_size = 8; 449 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 450 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 451 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 452 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 453 CeedQFunction qf; 454 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 455 CeedOperatorField *op_input_fields, *op_output_fields; 456 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 457 &num_output_fields, &op_output_fields); 458 CeedChkBackend(ierr); 459 CeedQFunctionField *qf_input_fields, *qf_output_fields; 460 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 461 &qf_output_fields); 462 CeedChkBackend(ierr); 463 CeedEvalMode eval_mode; 464 CeedVector vec; 465 CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0}; 466 467 // Setup 468 ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 469 470 // Restriction only operator 471 if (impl->is_identity_restr_op) { 472 ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec, 473 impl->e_vecs_full[0], request); CeedChkBackend(ierr); 474 ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE, 475 impl->e_vecs_full[0], out_vec, request); CeedChkBackend(ierr); 476 return CEED_ERROR_SUCCESS; 477 } 478 479 // Input Evecs and Restriction 480 ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields, 481 op_input_fields, in_vec, false, e_data_full, 482 impl, request); CeedChkBackend(ierr); 483 484 // Output Evecs 485 for (CeedInt i=0; i<num_output_fields; i++) { 486 ierr = CeedVectorGetArrayWrite(impl->e_vecs_full[i+impl->num_inputs], 487 CEED_MEM_HOST, &e_data_full[i + num_input_fields]); 488 CeedChkBackend(ierr); 489 } 490 491 // Loop through elements 492 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 493 // Output pointers 494 for (CeedInt i=0; i<num_output_fields; i++) { 495 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 496 CeedChkBackend(ierr); 497 if (eval_mode == CEED_EVAL_NONE) { 498 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 499 CeedChkBackend(ierr); 500 ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, 501 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*Q*size]); 502 CeedChkBackend(ierr); 503 } 504 } 505 506 // Input basis apply 507 ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields, 508 num_input_fields, blk_size, false, e_data_full, 509 impl); CeedChkBackend(ierr); 510 511 // Q function 512 if (!impl->is_identity_qf) { 513 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 514 CeedChkBackend(ierr); 515 } 516 517 // Output basis apply 518 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields, 519 blk_size, num_input_fields, 520 num_output_fields, op, e_data_full, impl); 521 CeedChkBackend(ierr); 522 } 523 524 // Output restriction 525 for (CeedInt i=0; i<num_output_fields; i++) { 526 // Restore evec 527 ierr = CeedVectorRestoreArray(impl->e_vecs_full[i+impl->num_inputs], 528 &e_data_full[i + num_input_fields]); 529 CeedChkBackend(ierr); 530 // Get output vector 531 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 532 CeedChkBackend(ierr); 533 // Active 534 if (vec == CEED_VECTOR_ACTIVE) 535 vec = out_vec; 536 // Restrict 537 ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_inputs], 538 CEED_TRANSPOSE, impl->e_vecs_full[i+impl->num_inputs], 539 vec, request); CeedChkBackend(ierr); 540 541 } 542 543 // Restore input arrays 544 ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields, 545 op_input_fields, false, e_data_full, impl); CeedChkBackend(ierr); 546 547 return CEED_ERROR_SUCCESS; 548 } 549 550 //------------------------------------------------------------------------------ 551 // Core code for assembling linear QFunction 552 //------------------------------------------------------------------------------ 553 static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked( 554 CeedOperator op, bool build_objects, CeedVector *assembled, 555 CeedElemRestriction *rstr, CeedRequest *request) { 556 int ierr; 557 CeedOperator_Blocked *impl; 558 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 559 const CeedInt blk_size = 8; 560 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 561 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 562 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 563 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 564 CeedQFunction qf; 565 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 566 CeedOperatorField *op_input_fields, *op_output_fields; 567 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 568 &num_output_fields, &op_output_fields); 569 CeedChkBackend(ierr); 570 CeedQFunctionField *qf_input_fields, *qf_output_fields; 571 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 572 &qf_output_fields); 573 CeedChkBackend(ierr); 574 CeedVector vec, l_vec = impl->qf_l_vec; 575 CeedInt num_active_in = impl->num_active_in, 576 num_active_out = impl->num_active_out; 577 CeedVector *active_in = impl->qf_active_in; 578 CeedScalar *a, *tmp; 579 Ceed ceed; 580 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 581 CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0}; 582 583 // Setup 584 ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 585 586 // Check for identity 587 if (impl->is_identity_qf) 588 // LCOV_EXCL_START 589 return CeedError(ceed, CEED_ERROR_BACKEND, 590 "Assembling identity QFunctions not supported"); 591 // LCOV_EXCL_STOP 592 593 // Input Evecs and Restriction 594 ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields, 595 op_input_fields, NULL, true, e_data_full, 596 impl, request); CeedChkBackend(ierr); 597 598 // Count number of active input fields 599 if (!num_active_in) { 600 for (CeedInt i=0; i<num_input_fields; i++) { 601 // Get input vector 602 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 603 CeedChkBackend(ierr); 604 // Check if active input 605 if (vec == CEED_VECTOR_ACTIVE) { 606 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 607 CeedChkBackend(ierr); 608 ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr); 609 ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp); 610 CeedChkBackend(ierr); 611 ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr); 612 for (CeedInt field=0; field<size; field++) { 613 ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]); 614 CeedChkBackend(ierr); 615 ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST, 616 CEED_USE_POINTER, &tmp[field*Q*blk_size]); 617 CeedChkBackend(ierr); 618 } 619 num_active_in += size; 620 ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr); 621 } 622 } 623 impl->num_active_in = num_active_in; 624 impl->qf_active_in = active_in; 625 } 626 627 // Count number of active output fields 628 if (!num_active_out) { 629 for (CeedInt i=0; i<num_output_fields; i++) { 630 // Get output vector 631 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 632 CeedChkBackend(ierr); 633 // Check if active output 634 if (vec == CEED_VECTOR_ACTIVE) { 635 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 636 CeedChkBackend(ierr); 637 num_active_out += size; 638 } 639 } 640 impl->num_active_out = num_active_out; 641 } 642 643 // Check sizes 644 if (!num_active_in || !num_active_out) 645 // LCOV_EXCL_START 646 return CeedError(ceed, CEED_ERROR_BACKEND, 647 "Cannot assemble QFunction without active inputs " 648 "and outputs"); 649 // LCOV_EXCL_STOP 650 651 // Setup Lvec 652 if (!l_vec) { 653 ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out, 654 &l_vec); CeedChkBackend(ierr); 655 impl->qf_l_vec = l_vec; 656 } 657 ierr = CeedVectorGetArrayWrite(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 658 659 // Build objects if needed 660 CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; 661 if (build_objects) { 662 // Create output restriction 663 ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q, 664 num_active_in*num_active_out, 665 num_active_in*num_active_out*num_elem*Q, 666 strides, rstr); CeedChkBackend(ierr); 667 // Create assembled vector 668 ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out, 669 assembled); CeedChkBackend(ierr); 670 } 671 672 // Loop through elements 673 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 674 // Input basis apply 675 ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields, 676 num_input_fields, blk_size, true, e_data_full, 677 impl); CeedChkBackend(ierr); 678 679 // Assemble QFunction 680 for (CeedInt in=0; in<num_active_in; in++) { 681 // Set Inputs 682 ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr); 683 if (num_active_in > 1) { 684 ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in], 685 0.0); CeedChkBackend(ierr); 686 } 687 // Set Outputs 688 for (CeedInt out=0; out<num_output_fields; out++) { 689 // Get output vector 690 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 691 CeedChkBackend(ierr); 692 // Check if active output 693 if (vec == CEED_VECTOR_ACTIVE) { 694 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, 695 CEED_USE_POINTER, a); CeedChkBackend(ierr); 696 ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size); 697 CeedChkBackend(ierr); 698 a += size*Q*blk_size; // Advance the pointer by the size of the output 699 } 700 } 701 // Apply QFunction 702 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 703 CeedChkBackend(ierr); 704 } 705 } 706 707 // Un-set output Qvecs to prevent accidental overwrite of Assembled 708 for (CeedInt out=0; out<num_output_fields; out++) { 709 // Get output vector 710 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 711 CeedChkBackend(ierr); 712 // Check if active output 713 if (vec == CEED_VECTOR_ACTIVE) { 714 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, 715 NULL); CeedChkBackend(ierr); 716 } 717 } 718 719 // Restore input arrays 720 ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields, 721 op_input_fields, true, e_data_full, impl); CeedChkBackend(ierr); 722 723 // Output blocked restriction 724 ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr); 725 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 726 CeedElemRestriction blk_rstr = impl->qf_blk_rstr; 727 if (!impl->qf_blk_rstr) { 728 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size, 729 num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q, 730 strides, &blk_rstr); CeedChkBackend(ierr); 731 impl->qf_blk_rstr = blk_rstr; 732 } 733 ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled, 734 request); CeedChkBackend(ierr); 735 736 return CEED_ERROR_SUCCESS; 737 } 738 739 //------------------------------------------------------------------------------ 740 // Assemble Linear QFunction 741 //------------------------------------------------------------------------------ 742 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op, 743 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 744 return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, true, assembled, 745 rstr, request); 746 } 747 748 //------------------------------------------------------------------------------ 749 // Update Assembled Linear QFunction 750 //------------------------------------------------------------------------------ 751 static int CeedOperatorLinearAssembleQFunctionUpdate_Blocked(CeedOperator op, 752 CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 753 return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, false, &assembled, 754 &rstr, request); 755 } 756 757 //------------------------------------------------------------------------------ 758 // Operator Destroy 759 //------------------------------------------------------------------------------ 760 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 761 int ierr; 762 CeedOperator_Blocked *impl; 763 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 764 765 for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) { 766 ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr); 767 ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr); 768 } 769 ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr); 770 ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr); 771 ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr); 772 773 for (CeedInt i=0; i<impl->num_inputs; i++) { 774 ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr); 775 ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr); 776 } 777 ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr); 778 ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr); 779 780 for (CeedInt i=0; i<impl->num_outputs; i++) { 781 ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr); 782 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 783 } 784 ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr); 785 ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr); 786 787 // QFunction assembly data 788 for (CeedInt i=0; i<impl->num_active_in; i++) { 789 ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr); 790 } 791 ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr); 792 ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr); 793 ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr); 794 795 ierr = CeedFree(&impl); CeedChkBackend(ierr); 796 return CEED_ERROR_SUCCESS; 797 } 798 799 //------------------------------------------------------------------------------ 800 // Operator Create 801 //------------------------------------------------------------------------------ 802 int CeedOperatorCreate_Blocked(CeedOperator op) { 803 int ierr; 804 Ceed ceed; 805 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 806 CeedOperator_Blocked *impl; 807 808 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 809 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 810 811 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 812 CeedOperatorLinearAssembleQFunction_Blocked); 813 CeedChkBackend(ierr); 814 ierr = CeedSetBackendFunction(ceed, "Operator", op, 815 "LinearAssembleQFunctionUpdate", 816 CeedOperatorLinearAssembleQFunctionUpdate_Blocked); 817 CeedChkBackend(ierr); 818 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 819 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr); 820 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 821 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr); 822 return CEED_ERROR_SUCCESS; 823 } 824 //------------------------------------------------------------------------------ 825