1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <stddef.h> 21 #include <stdint.h> 22 #include "ceed-blocked.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 28 CeedOperator op, bool inOrOut, CeedElemRestriction *blk_restr, 29 CeedVector *full_evecs, CeedVector *e_vecs, CeedVector *q_vecs, 30 CeedInt start_e, CeedInt numfields, CeedInt Q) { 31 CeedInt dim, ierr, num_comp, size, P; 32 Ceed ceed; 33 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 34 CeedBasis basis; 35 CeedElemRestriction r; 36 CeedOperatorField *op_fields; 37 CeedQFunctionField *qf_fields; 38 if (inOrOut) { 39 ierr = CeedOperatorGetFields(op, NULL, &op_fields); 40 CeedChkBackend(ierr); 41 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields); 42 CeedChkBackend(ierr); 43 } else { 44 ierr = CeedOperatorGetFields(op, &op_fields, NULL); 45 CeedChkBackend(ierr); 46 ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); 47 CeedChkBackend(ierr); 48 } 49 const CeedInt blk_size = 8; 50 51 // Loop over fields 52 for (CeedInt i=0; i<numfields; i++) { 53 CeedEvalMode eval_mode; 54 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 55 CeedChkBackend(ierr); 56 57 if (eval_mode != CEED_EVAL_WEIGHT) { 58 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r); 59 CeedChkBackend(ierr); 60 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 61 CeedInt num_elem, elem_size, l_size, comp_stride; 62 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 63 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 64 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 65 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 66 67 bool strided; 68 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 69 if (strided) { 70 CeedInt strides[3]; 71 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 72 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size, 73 blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]); 74 CeedChkBackend(ierr); 75 } else { 76 const CeedInt *offsets = NULL; 77 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 78 CeedChkBackend(ierr); 79 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 80 ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size, 81 blk_size, num_comp, comp_stride, 82 l_size, CEED_MEM_HOST, 83 CEED_COPY_VALUES, offsets, 84 &blk_restr[i+start_e]); 85 CeedChkBackend(ierr); 86 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 87 } 88 ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL, 89 &full_evecs[i+start_e]); 90 CeedChkBackend(ierr); 91 } 92 93 switch(eval_mode) { 94 case CEED_EVAL_NONE: 95 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 96 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 97 CeedChkBackend(ierr); 98 break; 99 case CEED_EVAL_INTERP: 100 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 101 ierr = CeedElemRestrictionGetElementSize(r, &P); 102 CeedChkBackend(ierr); 103 ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]); 104 CeedChkBackend(ierr); 105 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 106 CeedChkBackend(ierr); 107 break; 108 case CEED_EVAL_GRAD: 109 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 110 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 111 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 112 ierr = CeedElemRestrictionGetElementSize(r, &P); 113 CeedChkBackend(ierr); 114 ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]); 115 CeedChkBackend(ierr); 116 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 117 CeedChkBackend(ierr); 118 break; 119 case CEED_EVAL_WEIGHT: // Only on input fields 120 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 121 ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr); 122 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 123 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]); 124 CeedChkBackend(ierr); 125 126 break; 127 case CEED_EVAL_DIV: 128 break; // Not implemented 129 case CEED_EVAL_CURL: 130 break; // Not implemented 131 } 132 } 133 return CEED_ERROR_SUCCESS; 134 } 135 136 //------------------------------------------------------------------------------ 137 // Setup Operator 138 //------------------------------------------------------------------------------ 139 static int CeedOperatorSetup_Blocked(CeedOperator op) { 140 int ierr; 141 bool setup_done; 142 ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr); 143 if (setup_done) return CEED_ERROR_SUCCESS; 144 Ceed ceed; 145 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 146 CeedOperator_Blocked *impl; 147 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 148 CeedQFunction qf; 149 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 150 CeedInt Q, num_input_fields, num_output_fields; 151 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 152 ierr = CeedQFunctionIsIdentity(qf, &impl->identity_qf); CeedChkBackend(ierr); 153 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 154 CeedChkBackend(ierr); 155 CeedOperatorField *op_input_fields, *op_output_fields; 156 ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields); 157 CeedChkBackend(ierr); 158 CeedQFunctionField *qf_input_fields, *qf_output_fields; 159 ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields); 160 CeedChkBackend(ierr); 161 162 // Allocate 163 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr); 164 CeedChkBackend(ierr); 165 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs); 166 CeedChkBackend(ierr); 167 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data); 168 CeedChkBackend(ierr); 169 170 ierr = CeedCalloc(16, &impl->input_state); CeedChkBackend(ierr); 171 ierr = CeedCalloc(16, &impl->e_vecs_in); CeedChkBackend(ierr); 172 ierr = CeedCalloc(16, &impl->e_vecs_out); CeedChkBackend(ierr); 173 ierr = CeedCalloc(16, &impl->q_vecs_in); CeedChkBackend(ierr); 174 ierr = CeedCalloc(16, &impl->q_vecs_out); CeedChkBackend(ierr); 175 176 impl->num_e_vecs_in = num_input_fields; 177 impl->num_e_vecs_out = num_output_fields; 178 179 // Set up infield and outfield pointer arrays 180 // Infields 181 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blk_restr, 182 impl->e_vecs, impl->e_vecs_in, 183 impl->q_vecs_in, 0, 184 num_input_fields, Q); 185 CeedChkBackend(ierr); 186 // Outfields 187 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blk_restr, 188 impl->e_vecs, impl->e_vecs_out, 189 impl->q_vecs_out, num_input_fields, 190 num_output_fields, Q); 191 CeedChkBackend(ierr); 192 193 // Identity QFunctions 194 if (impl->identity_qf) { 195 CeedEvalMode in_mode, out_mode; 196 CeedQFunctionField *in_fields, *out_fields; 197 ierr = CeedQFunctionGetFields(qf, &in_fields, &out_fields); 198 CeedChkBackend(ierr); 199 200 for (CeedInt i=0; i<num_input_fields; i++) { 201 ierr = CeedQFunctionFieldGetEvalMode(in_fields[i], &in_mode); 202 CeedChkBackend(ierr); 203 ierr = CeedQFunctionFieldGetEvalMode(out_fields[i], &out_mode); 204 CeedChkBackend(ierr); 205 206 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 207 impl->q_vecs_out[i] = impl->q_vecs_in[i]; 208 ierr = CeedVectorAddReference(impl->q_vecs_in[i]); CeedChkBackend(ierr); 209 } 210 } 211 212 ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 213 214 return CEED_ERROR_SUCCESS; 215 } 216 217 //------------------------------------------------------------------------------ 218 // Setup Operator Inputs 219 //------------------------------------------------------------------------------ 220 static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields, 221 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 222 CeedVector in_vec, bool skip_active, CeedOperator_Blocked *impl, 223 CeedRequest *request) { 224 CeedInt ierr; 225 CeedEvalMode eval_mode; 226 CeedVector vec; 227 uint64_t state; 228 229 for (CeedInt i=0; i<num_input_fields; i++) { 230 // Get input vector 231 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 232 CeedChkBackend(ierr); 233 if (vec == CEED_VECTOR_ACTIVE) { 234 if (skip_active) 235 continue; 236 else 237 vec = in_vec; 238 } 239 240 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 241 CeedChkBackend(ierr); 242 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 243 } else { 244 // Restrict 245 ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 246 if (state != impl->input_state[i] || vec == in_vec) { 247 ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE, 248 vec, impl->e_vecs[i], request); 249 CeedChkBackend(ierr); 250 impl->input_state[i] = state; 251 } 252 // Get evec 253 ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST, 254 (const CeedScalar **) &impl->e_data[i]); 255 CeedChkBackend(ierr); 256 } 257 } 258 return CEED_ERROR_SUCCESS; 259 } 260 261 //------------------------------------------------------------------------------ 262 // Input Basis Action 263 //------------------------------------------------------------------------------ 264 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 265 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 266 CeedInt num_input_fields, CeedInt blk_size, bool skip_active, 267 CeedOperator_Blocked *impl) { 268 CeedInt ierr; 269 CeedInt dim, elem_size, size; 270 CeedElemRestriction elem_restr; 271 CeedEvalMode eval_mode; 272 CeedBasis basis; 273 274 for (CeedInt i=0; i<num_input_fields; i++) { 275 // Skip active input 276 if (skip_active) { 277 CeedVector vec; 278 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 279 CeedChkBackend(ierr); 280 if (vec == CEED_VECTOR_ACTIVE) 281 continue; 282 } 283 284 // Get elem_size, eval_mode, size 285 ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr); 286 CeedChkBackend(ierr); 287 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 288 CeedChkBackend(ierr); 289 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 290 CeedChkBackend(ierr); 291 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 292 CeedChkBackend(ierr); 293 // Basis action 294 switch(eval_mode) { 295 case CEED_EVAL_NONE: 296 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 297 CEED_USE_POINTER, &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr); 298 break; 299 case CEED_EVAL_INTERP: 300 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 301 CeedChkBackend(ierr); 302 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 303 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size]); 304 CeedChkBackend(ierr); 305 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 306 CEED_EVAL_INTERP, impl->e_vecs_in[i], 307 impl->q_vecs_in[i]); CeedChkBackend(ierr); 308 break; 309 case CEED_EVAL_GRAD: 310 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 311 CeedChkBackend(ierr); 312 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 313 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 314 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size/dim]); 315 CeedChkBackend(ierr); 316 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 317 CEED_EVAL_GRAD, impl->e_vecs_in[i], 318 impl->q_vecs_in[i]); CeedChkBackend(ierr); 319 break; 320 case CEED_EVAL_WEIGHT: 321 break; // No action 322 // LCOV_EXCL_START 323 case CEED_EVAL_DIV: 324 case CEED_EVAL_CURL: { 325 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 326 CeedChkBackend(ierr); 327 Ceed ceed; 328 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 329 return CeedError(ceed, CEED_ERROR_BACKEND, 330 "Ceed evaluation mode not implemented"); 331 // LCOV_EXCL_STOP 332 } 333 } 334 } 335 return CEED_ERROR_SUCCESS; 336 } 337 338 //------------------------------------------------------------------------------ 339 // Output Basis Action 340 //------------------------------------------------------------------------------ 341 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 342 CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, 343 CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields, 344 CeedOperator op, CeedOperator_Blocked *impl) { 345 CeedInt ierr; 346 CeedInt dim, elem_size, size; 347 CeedElemRestriction elem_restr; 348 CeedEvalMode eval_mode; 349 CeedBasis basis; 350 351 for (CeedInt i=0; i<num_output_fields; i++) { 352 // Get elem_size, eval_mode, size 353 ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr); 354 CeedChkBackend(ierr); 355 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 356 CeedChkBackend(ierr); 357 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 358 CeedChkBackend(ierr); 359 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 360 CeedChkBackend(ierr); 361 // Basis action 362 switch(eval_mode) { 363 case CEED_EVAL_NONE: 364 break; // No action 365 case CEED_EVAL_INTERP: 366 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 367 CeedChkBackend(ierr); 368 ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 369 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size]); 370 CeedChkBackend(ierr); 371 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 372 CEED_EVAL_INTERP, impl->q_vecs_out[i], 373 impl->e_vecs_out[i]); CeedChkBackend(ierr); 374 break; 375 case CEED_EVAL_GRAD: 376 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 377 CeedChkBackend(ierr); 378 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 379 ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 380 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size/dim]); 381 CeedChkBackend(ierr); 382 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 383 CEED_EVAL_GRAD, impl->q_vecs_out[i], 384 impl->e_vecs_out[i]); CeedChkBackend(ierr); 385 break; 386 // LCOV_EXCL_START 387 case CEED_EVAL_WEIGHT: { 388 Ceed ceed; 389 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 390 return CeedError(ceed, CEED_ERROR_BACKEND, 391 "CEED_EVAL_WEIGHT cannot be an output " 392 "evaluation mode"); 393 } 394 case CEED_EVAL_DIV: 395 case CEED_EVAL_CURL: { 396 Ceed ceed; 397 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 398 return CeedError(ceed, CEED_ERROR_BACKEND, 399 "Ceed evaluation mode not implemented"); 400 // LCOV_EXCL_STOP 401 } 402 } 403 } 404 return CEED_ERROR_SUCCESS; 405 } 406 407 //------------------------------------------------------------------------------ 408 // Restore Input Vectors 409 //------------------------------------------------------------------------------ 410 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields, 411 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 412 bool skip_active, CeedOperator_Blocked *impl) { 413 CeedInt ierr; 414 CeedEvalMode eval_mode; 415 416 for (CeedInt i=0; i<num_input_fields; i++) { 417 // Skip active inputs 418 if (skip_active) { 419 CeedVector vec; 420 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 421 CeedChkBackend(ierr); 422 if (vec == CEED_VECTOR_ACTIVE) 423 continue; 424 } 425 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 426 CeedChkBackend(ierr); 427 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 428 } else { 429 ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i], 430 (const CeedScalar **) &impl->e_data[i]); 431 CeedChkBackend(ierr); 432 } 433 } 434 return CEED_ERROR_SUCCESS; 435 } 436 437 //------------------------------------------------------------------------------ 438 // Operator Apply 439 //------------------------------------------------------------------------------ 440 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec, 441 CeedVector out_vec, 442 CeedRequest *request) { 443 int ierr; 444 CeedOperator_Blocked *impl; 445 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 446 const CeedInt blk_size = 8; 447 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 448 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 449 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 450 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 451 CeedQFunction qf; 452 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 453 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 454 CeedChkBackend(ierr); 455 CeedOperatorField *op_input_fields, *op_output_fields; 456 ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields); 457 CeedChkBackend(ierr); 458 CeedQFunctionField *qf_input_fields, *qf_output_fields; 459 ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields); 460 CeedChkBackend(ierr); 461 CeedEvalMode eval_mode; 462 CeedVector vec; 463 464 // Setup 465 ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 466 467 // Input Evecs and Restriction 468 ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields, 469 op_input_fields, in_vec, false, impl, 470 request); CeedChkBackend(ierr); 471 472 // Output Evecs 473 for (CeedInt i=0; i<num_output_fields; i++) { 474 ierr = CeedVectorGetArray(impl->e_vecs[i+impl->num_e_vecs_in], CEED_MEM_HOST, 475 &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr); 476 } 477 478 // Loop through elements 479 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 480 // Output pointers 481 for (CeedInt i=0; i<num_output_fields; i++) { 482 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 483 CeedChkBackend(ierr); 484 if (eval_mode == CEED_EVAL_NONE) { 485 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 486 CeedChkBackend(ierr); 487 ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, 488 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*Q*size]); 489 CeedChkBackend(ierr); 490 } 491 } 492 493 // Input basis apply 494 ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields, 495 num_input_fields, blk_size, false, impl); 496 CeedChkBackend(ierr); 497 498 // Q function 499 if (!impl->identity_qf) { 500 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 501 CeedChkBackend(ierr); 502 } 503 504 // Output basis apply 505 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields, 506 blk_size, num_input_fields, 507 num_output_fields, op, impl); 508 CeedChkBackend(ierr); 509 } 510 511 // Output restriction 512 for (CeedInt i=0; i<num_output_fields; i++) { 513 // Restore evec 514 ierr = CeedVectorRestoreArray(impl->e_vecs[i+impl->num_e_vecs_in], 515 &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr); 516 // Get output vector 517 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 518 CeedChkBackend(ierr); 519 // Active 520 if (vec == CEED_VECTOR_ACTIVE) 521 vec = out_vec; 522 // Restrict 523 ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_e_vecs_in], 524 CEED_TRANSPOSE, impl->e_vecs[i+impl->num_e_vecs_in], 525 vec, request); CeedChkBackend(ierr); 526 527 } 528 529 // Restore input arrays 530 ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields, 531 op_input_fields, false, impl); CeedChkBackend(ierr); 532 533 return CEED_ERROR_SUCCESS; 534 } 535 536 //------------------------------------------------------------------------------ 537 // Assemble Linear QFunction 538 //------------------------------------------------------------------------------ 539 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op, 540 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 541 int ierr; 542 CeedOperator_Blocked *impl; 543 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 544 const CeedInt blk_size = 8; 545 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 546 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 547 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 548 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 549 CeedQFunction qf; 550 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 551 ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields); 552 CeedChkBackend(ierr); 553 CeedOperatorField *op_input_fields, *op_output_fields; 554 ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields); 555 CeedChkBackend(ierr); 556 CeedQFunctionField *qf_input_fields, *qf_output_fields; 557 ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields); 558 CeedChkBackend(ierr); 559 CeedVector vec, lvec; 560 CeedInt num_active_in = 0, num_active_out = 0; 561 CeedVector *active_in = NULL; 562 CeedScalar *a, *tmp; 563 Ceed ceed; 564 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 565 566 // Setup 567 ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 568 569 // Check for identity 570 if (impl->identity_qf) 571 // LCOV_EXCL_START 572 return CeedError(ceed, CEED_ERROR_BACKEND, 573 "Assembling identity QFunctions not supported"); 574 // LCOV_EXCL_STOP 575 576 // Input Evecs and Restriction 577 ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields, 578 op_input_fields, NULL, true, impl, 579 request); CeedChkBackend(ierr); 580 581 // Count number of active input fields 582 for (CeedInt i=0; i<num_input_fields; i++) { 583 // Get input vector 584 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 585 CeedChkBackend(ierr); 586 // Check if active input 587 if (vec == CEED_VECTOR_ACTIVE) { 588 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 589 CeedChkBackend(ierr); 590 ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr); 591 ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp); 592 CeedChkBackend(ierr); 593 ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr); 594 for (CeedInt field=0; field<size; field++) { 595 ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]); 596 CeedChkBackend(ierr); 597 ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST, 598 CEED_USE_POINTER, &tmp[field*Q*blk_size]); 599 CeedChkBackend(ierr); 600 } 601 num_active_in += size; 602 ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr); 603 } 604 } 605 606 // Count number of active output fields 607 for (CeedInt i=0; i<num_output_fields; i++) { 608 // Get output vector 609 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 610 CeedChkBackend(ierr); 611 // Check if active output 612 if (vec == CEED_VECTOR_ACTIVE) { 613 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 614 CeedChkBackend(ierr); 615 num_active_out += size; 616 } 617 } 618 619 // Check sizes 620 if (!num_active_in || !num_active_out) 621 // LCOV_EXCL_START 622 return CeedError(ceed, CEED_ERROR_BACKEND, 623 "Cannot assemble QFunction without active inputs " 624 "and outputs"); 625 // LCOV_EXCL_STOP 626 627 // Setup Lvec 628 ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out, 629 &lvec); CeedChkBackend(ierr); 630 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 631 632 // Create output restriction 633 CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; 634 ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q, 635 num_active_in*num_active_out, 636 num_active_in*num_active_out*num_elem*Q, 637 strides, rstr); CeedChkBackend(ierr); 638 // Create assembled vector 639 ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out, 640 assembled); CeedChkBackend(ierr); 641 642 // Loop through elements 643 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 644 // Input basis apply 645 ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields, 646 num_input_fields, blk_size, true, impl); 647 CeedChkBackend(ierr); 648 649 // Assemble QFunction 650 for (CeedInt in=0; in<num_active_in; in++) { 651 // Set Inputs 652 ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr); 653 if (num_active_in > 1) { 654 ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in], 655 0.0); CeedChkBackend(ierr); 656 } 657 // Set Outputs 658 for (CeedInt out=0; out<num_output_fields; out++) { 659 // Get output vector 660 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 661 CeedChkBackend(ierr); 662 // Check if active output 663 if (vec == CEED_VECTOR_ACTIVE) { 664 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, 665 CEED_USE_POINTER, a); CeedChkBackend(ierr); 666 ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size); 667 CeedChkBackend(ierr); 668 a += size*Q*blk_size; // Advance the pointer by the size of the output 669 } 670 } 671 // Apply QFunction 672 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 673 CeedChkBackend(ierr); 674 } 675 } 676 677 // Un-set output Qvecs to prevent accidental overwrite of Assembled 678 for (CeedInt out=0; out<num_output_fields; out++) { 679 // Get output vector 680 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 681 CeedChkBackend(ierr); 682 // Check if active output 683 if (vec == CEED_VECTOR_ACTIVE) { 684 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, 685 NULL); CeedChkBackend(ierr); 686 } 687 } 688 689 // Restore input arrays 690 ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields, 691 op_input_fields, true, impl); CeedChkBackend(ierr); 692 693 // Output blocked restriction 694 ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr); 695 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 696 CeedElemRestriction blk_rstr; 697 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size, 698 num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q, 699 strides, &blk_rstr); CeedChkBackend(ierr); 700 ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled, 701 request); CeedChkBackend(ierr); 702 703 // Cleanup 704 for (CeedInt i=0; i<num_active_in; i++) { 705 ierr = CeedVectorDestroy(&active_in[i]); CeedChkBackend(ierr); 706 } 707 ierr = CeedFree(&active_in); CeedChkBackend(ierr); 708 ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr); 709 ierr = CeedElemRestrictionDestroy(&blk_rstr); CeedChkBackend(ierr); 710 711 return CEED_ERROR_SUCCESS; 712 } 713 714 //------------------------------------------------------------------------------ 715 // Operator Destroy 716 //------------------------------------------------------------------------------ 717 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 718 int ierr; 719 CeedOperator_Blocked *impl; 720 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 721 722 for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) { 723 ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr); 724 ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr); 725 } 726 ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr); 727 ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr); 728 ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr); 729 ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr); 730 731 for (CeedInt i=0; i<impl->num_e_vecs_in; i++) { 732 ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr); 733 ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr); 734 } 735 ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr); 736 ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr); 737 738 for (CeedInt i=0; i<impl->num_e_vecs_out; i++) { 739 ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr); 740 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 741 } 742 ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr); 743 ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr); 744 745 ierr = CeedFree(&impl); CeedChkBackend(ierr); 746 return CEED_ERROR_SUCCESS; 747 } 748 749 //------------------------------------------------------------------------------ 750 // Operator Create 751 //------------------------------------------------------------------------------ 752 int CeedOperatorCreate_Blocked(CeedOperator op) { 753 int ierr; 754 Ceed ceed; 755 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 756 CeedOperator_Blocked *impl; 757 758 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 759 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 760 761 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 762 CeedOperatorLinearAssembleQFunction_Blocked); 763 CeedChkBackend(ierr); 764 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 765 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr); 766 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 767 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr); 768 return CEED_ERROR_SUCCESS; 769 } 770 //------------------------------------------------------------------------------ 771