1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include <stdbool.h> 20 #include <stddef.h> 21 #include <stdint.h> 22 #include "ceed-blocked.h" 23 24 //------------------------------------------------------------------------------ 25 // Setup Input/Output Fields 26 //------------------------------------------------------------------------------ 27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 28 CeedOperator op, bool inOrOut, CeedElemRestriction *blk_restr, 29 CeedVector *full_evecs, CeedVector *e_vecs, CeedVector *q_vecs, 30 CeedInt start_e, CeedInt numfields, CeedInt Q) { 31 CeedInt dim, ierr, num_comp, size, P; 32 Ceed ceed; 33 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 34 CeedBasis basis; 35 CeedElemRestriction r; 36 CeedOperatorField *op_fields; 37 CeedQFunctionField *qf_fields; 38 if (inOrOut) { 39 ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); 40 CeedChkBackend(ierr); 41 ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); 42 CeedChkBackend(ierr); 43 } else { 44 ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); 45 CeedChkBackend(ierr); 46 ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); 47 CeedChkBackend(ierr); 48 } 49 const CeedInt blk_size = 8; 50 51 // Loop over fields 52 for (CeedInt i=0; i<numfields; i++) { 53 CeedEvalMode eval_mode; 54 ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 55 CeedChkBackend(ierr); 56 57 if (eval_mode != CEED_EVAL_WEIGHT) { 58 ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r); 59 CeedChkBackend(ierr); 60 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 61 CeedInt num_elem, elem_size, l_size, comp_stride; 62 ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 63 ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 64 ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 65 ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 66 67 bool strided; 68 ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 69 if (strided) { 70 CeedInt strides[3]; 71 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 72 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size, 73 blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]); 74 CeedChkBackend(ierr); 75 } else { 76 const CeedInt *offsets = NULL; 77 ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 78 CeedChkBackend(ierr); 79 ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 80 ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size, 81 blk_size, num_comp, comp_stride, 82 l_size, CEED_MEM_HOST, 83 CEED_COPY_VALUES, offsets, 84 &blk_restr[i+start_e]); 85 CeedChkBackend(ierr); 86 ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 87 } 88 ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL, 89 &full_evecs[i+start_e]); 90 CeedChkBackend(ierr); 91 } 92 93 switch(eval_mode) { 94 case CEED_EVAL_NONE: 95 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 96 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 97 CeedChkBackend(ierr); 98 break; 99 case CEED_EVAL_INTERP: 100 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 101 ierr = CeedElemRestrictionGetElementSize(r, &P); 102 CeedChkBackend(ierr); 103 ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]); 104 CeedChkBackend(ierr); 105 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 106 CeedChkBackend(ierr); 107 break; 108 case CEED_EVAL_GRAD: 109 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 110 ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 111 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 112 ierr = CeedElemRestrictionGetElementSize(r, &P); 113 CeedChkBackend(ierr); 114 ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]); 115 CeedChkBackend(ierr); 116 ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 117 CeedChkBackend(ierr); 118 break; 119 case CEED_EVAL_WEIGHT: // Only on input fields 120 ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 121 ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr); 122 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 123 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]); 124 CeedChkBackend(ierr); 125 126 break; 127 case CEED_EVAL_DIV: 128 break; // Not implemented 129 case CEED_EVAL_CURL: 130 break; // Not implemented 131 } 132 } 133 return CEED_ERROR_SUCCESS; 134 } 135 136 //------------------------------------------------------------------------------ 137 // Setup Operator 138 //------------------------------------------------------------------------------ 139 static int CeedOperatorSetup_Blocked(CeedOperator op) { 140 int ierr; 141 bool setup_done; 142 ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr); 143 if (setup_done) return CEED_ERROR_SUCCESS; 144 Ceed ceed; 145 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 146 CeedOperator_Blocked *impl; 147 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 148 CeedQFunction qf; 149 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 150 CeedInt Q, num_input_fields, num_output_fields; 151 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 152 ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr); 153 CeedOperatorField *op_input_fields, *op_output_fields; 154 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 155 &num_output_fields, &op_output_fields); 156 CeedChkBackend(ierr); 157 CeedQFunctionField *qf_input_fields, *qf_output_fields; 158 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 159 &qf_output_fields); 160 CeedChkBackend(ierr); 161 162 // Allocate 163 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr); 164 CeedChkBackend(ierr); 165 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs); 166 CeedChkBackend(ierr); 167 ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data); 168 CeedChkBackend(ierr); 169 170 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_state); CeedChkBackend(ierr); 171 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr); 172 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr); 173 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr); 174 ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr); 175 176 impl->num_e_vecs_in = num_input_fields; 177 impl->num_e_vecs_out = num_output_fields; 178 179 // Set up infield and outfield pointer arrays 180 // Infields 181 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blk_restr, 182 impl->e_vecs, impl->e_vecs_in, 183 impl->q_vecs_in, 0, 184 num_input_fields, Q); 185 CeedChkBackend(ierr); 186 // Outfields 187 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blk_restr, 188 impl->e_vecs, impl->e_vecs_out, 189 impl->q_vecs_out, num_input_fields, 190 num_output_fields, Q); 191 CeedChkBackend(ierr); 192 193 // Identity QFunctions 194 if (impl->is_identity_qf) { 195 CeedEvalMode in_mode, out_mode; 196 CeedQFunctionField *in_fields, *out_fields; 197 ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields); 198 CeedChkBackend(ierr); 199 ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode); 200 CeedChkBackend(ierr); 201 ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode); 202 CeedChkBackend(ierr); 203 204 if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) { 205 impl->is_identity_restr_op = true; 206 } else { 207 ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr); 208 impl->q_vecs_out[0] = impl->q_vecs_in[0]; 209 ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr); 210 } 211 } 212 213 ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 214 215 return CEED_ERROR_SUCCESS; 216 } 217 218 //------------------------------------------------------------------------------ 219 // Setup Operator Inputs 220 //------------------------------------------------------------------------------ 221 static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields, 222 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 223 CeedVector in_vec, bool skip_active, CeedOperator_Blocked *impl, 224 CeedRequest *request) { 225 CeedInt ierr; 226 CeedEvalMode eval_mode; 227 CeedVector vec; 228 uint64_t state; 229 230 for (CeedInt i=0; i<num_input_fields; i++) { 231 // Get input vector 232 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 233 CeedChkBackend(ierr); 234 if (vec == CEED_VECTOR_ACTIVE) { 235 if (skip_active) 236 continue; 237 else 238 vec = in_vec; 239 } 240 241 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 242 CeedChkBackend(ierr); 243 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 244 } else { 245 // Restrict 246 ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 247 if (state != impl->input_state[i] || vec == in_vec) { 248 ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE, 249 vec, impl->e_vecs[i], request); 250 CeedChkBackend(ierr); 251 impl->input_state[i] = state; 252 } 253 // Get evec 254 ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST, 255 (const CeedScalar **) &impl->e_data[i]); 256 CeedChkBackend(ierr); 257 } 258 } 259 return CEED_ERROR_SUCCESS; 260 } 261 262 //------------------------------------------------------------------------------ 263 // Input Basis Action 264 //------------------------------------------------------------------------------ 265 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 266 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 267 CeedInt num_input_fields, CeedInt blk_size, bool skip_active, 268 CeedOperator_Blocked *impl) { 269 CeedInt ierr; 270 CeedInt dim, elem_size, size; 271 CeedElemRestriction elem_restr; 272 CeedEvalMode eval_mode; 273 CeedBasis basis; 274 275 for (CeedInt i=0; i<num_input_fields; i++) { 276 // Skip active input 277 if (skip_active) { 278 CeedVector vec; 279 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 280 CeedChkBackend(ierr); 281 if (vec == CEED_VECTOR_ACTIVE) 282 continue; 283 } 284 285 // Get elem_size, eval_mode, size 286 ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr); 287 CeedChkBackend(ierr); 288 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 289 CeedChkBackend(ierr); 290 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 291 CeedChkBackend(ierr); 292 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 293 CeedChkBackend(ierr); 294 // Basis action 295 switch(eval_mode) { 296 case CEED_EVAL_NONE: 297 ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 298 CEED_USE_POINTER, &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr); 299 break; 300 case CEED_EVAL_INTERP: 301 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 302 CeedChkBackend(ierr); 303 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 304 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size]); 305 CeedChkBackend(ierr); 306 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 307 CEED_EVAL_INTERP, impl->e_vecs_in[i], 308 impl->q_vecs_in[i]); CeedChkBackend(ierr); 309 break; 310 case CEED_EVAL_GRAD: 311 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 312 CeedChkBackend(ierr); 313 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 314 ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 315 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size/dim]); 316 CeedChkBackend(ierr); 317 ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 318 CEED_EVAL_GRAD, impl->e_vecs_in[i], 319 impl->q_vecs_in[i]); CeedChkBackend(ierr); 320 break; 321 case CEED_EVAL_WEIGHT: 322 break; // No action 323 // LCOV_EXCL_START 324 case CEED_EVAL_DIV: 325 case CEED_EVAL_CURL: { 326 ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 327 CeedChkBackend(ierr); 328 Ceed ceed; 329 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 330 return CeedError(ceed, CEED_ERROR_BACKEND, 331 "Ceed evaluation mode not implemented"); 332 // LCOV_EXCL_STOP 333 } 334 } 335 } 336 return CEED_ERROR_SUCCESS; 337 } 338 339 //------------------------------------------------------------------------------ 340 // Output Basis Action 341 //------------------------------------------------------------------------------ 342 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 343 CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, 344 CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields, 345 CeedOperator op, CeedOperator_Blocked *impl) { 346 CeedInt ierr; 347 CeedInt dim, elem_size, size; 348 CeedElemRestriction elem_restr; 349 CeedEvalMode eval_mode; 350 CeedBasis basis; 351 352 for (CeedInt i=0; i<num_output_fields; i++) { 353 // Get elem_size, eval_mode, size 354 ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr); 355 CeedChkBackend(ierr); 356 ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 357 CeedChkBackend(ierr); 358 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 359 CeedChkBackend(ierr); 360 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 361 CeedChkBackend(ierr); 362 // Basis action 363 switch(eval_mode) { 364 case CEED_EVAL_NONE: 365 break; // No action 366 case CEED_EVAL_INTERP: 367 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 368 CeedChkBackend(ierr); 369 ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 370 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size]); 371 CeedChkBackend(ierr); 372 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 373 CEED_EVAL_INTERP, impl->q_vecs_out[i], 374 impl->e_vecs_out[i]); CeedChkBackend(ierr); 375 break; 376 case CEED_EVAL_GRAD: 377 ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 378 CeedChkBackend(ierr); 379 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 380 ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 381 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size/dim]); 382 CeedChkBackend(ierr); 383 ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 384 CEED_EVAL_GRAD, impl->q_vecs_out[i], 385 impl->e_vecs_out[i]); CeedChkBackend(ierr); 386 break; 387 // LCOV_EXCL_START 388 case CEED_EVAL_WEIGHT: { 389 Ceed ceed; 390 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 391 return CeedError(ceed, CEED_ERROR_BACKEND, 392 "CEED_EVAL_WEIGHT cannot be an output " 393 "evaluation mode"); 394 } 395 case CEED_EVAL_DIV: 396 case CEED_EVAL_CURL: { 397 Ceed ceed; 398 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 399 return CeedError(ceed, CEED_ERROR_BACKEND, 400 "Ceed evaluation mode not implemented"); 401 // LCOV_EXCL_STOP 402 } 403 } 404 } 405 return CEED_ERROR_SUCCESS; 406 } 407 408 //------------------------------------------------------------------------------ 409 // Restore Input Vectors 410 //------------------------------------------------------------------------------ 411 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields, 412 CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 413 bool skip_active, CeedOperator_Blocked *impl) { 414 CeedInt ierr; 415 CeedEvalMode eval_mode; 416 417 for (CeedInt i=0; i<num_input_fields; i++) { 418 // Skip active inputs 419 if (skip_active) { 420 CeedVector vec; 421 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 422 CeedChkBackend(ierr); 423 if (vec == CEED_VECTOR_ACTIVE) 424 continue; 425 } 426 ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 427 CeedChkBackend(ierr); 428 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 429 } else { 430 ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i], 431 (const CeedScalar **) &impl->e_data[i]); 432 CeedChkBackend(ierr); 433 } 434 } 435 return CEED_ERROR_SUCCESS; 436 } 437 438 //------------------------------------------------------------------------------ 439 // Operator Apply 440 //------------------------------------------------------------------------------ 441 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec, 442 CeedVector out_vec, 443 CeedRequest *request) { 444 int ierr; 445 CeedOperator_Blocked *impl; 446 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 447 const CeedInt blk_size = 8; 448 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 449 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 450 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 451 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 452 CeedQFunction qf; 453 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 454 CeedOperatorField *op_input_fields, *op_output_fields; 455 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 456 &num_output_fields, &op_output_fields); 457 CeedChkBackend(ierr); 458 CeedQFunctionField *qf_input_fields, *qf_output_fields; 459 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 460 &qf_output_fields); 461 CeedChkBackend(ierr); 462 CeedEvalMode eval_mode; 463 CeedVector vec; 464 465 // Setup 466 ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 467 468 // Restriction only operator 469 if (impl->is_identity_restr_op) { 470 ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec, 471 impl->e_vecs[0], request); CeedChkBackend(ierr); 472 ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE, 473 impl->e_vecs[0], out_vec, request); CeedChkBackend(ierr); 474 return CEED_ERROR_SUCCESS; 475 } 476 477 // Input Evecs and Restriction 478 ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields, 479 op_input_fields, in_vec, false, impl, 480 request); CeedChkBackend(ierr); 481 482 // Output Evecs 483 for (CeedInt i=0; i<num_output_fields; i++) { 484 ierr = CeedVectorGetArray(impl->e_vecs[i+impl->num_e_vecs_in], CEED_MEM_HOST, 485 &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr); 486 } 487 488 // Loop through elements 489 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 490 // Output pointers 491 for (CeedInt i=0; i<num_output_fields; i++) { 492 ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 493 CeedChkBackend(ierr); 494 if (eval_mode == CEED_EVAL_NONE) { 495 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 496 CeedChkBackend(ierr); 497 ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, 498 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*Q*size]); 499 CeedChkBackend(ierr); 500 } 501 } 502 503 // Input basis apply 504 ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields, 505 num_input_fields, blk_size, false, impl); 506 CeedChkBackend(ierr); 507 508 // Q function 509 if (!impl->is_identity_qf) { 510 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 511 CeedChkBackend(ierr); 512 } 513 514 // Output basis apply 515 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields, 516 blk_size, num_input_fields, 517 num_output_fields, op, impl); 518 CeedChkBackend(ierr); 519 } 520 521 // Output restriction 522 for (CeedInt i=0; i<num_output_fields; i++) { 523 // Restore evec 524 ierr = CeedVectorRestoreArray(impl->e_vecs[i+impl->num_e_vecs_in], 525 &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr); 526 // Get output vector 527 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 528 CeedChkBackend(ierr); 529 // Active 530 if (vec == CEED_VECTOR_ACTIVE) 531 vec = out_vec; 532 // Restrict 533 ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_e_vecs_in], 534 CEED_TRANSPOSE, impl->e_vecs[i+impl->num_e_vecs_in], 535 vec, request); CeedChkBackend(ierr); 536 537 } 538 539 // Restore input arrays 540 ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields, 541 op_input_fields, false, impl); CeedChkBackend(ierr); 542 543 return CEED_ERROR_SUCCESS; 544 } 545 546 //------------------------------------------------------------------------------ 547 // Core code for assembling linear QFunction 548 //------------------------------------------------------------------------------ 549 static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked( 550 CeedOperator op, 551 bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr, 552 CeedRequest *request) { 553 int ierr; 554 CeedOperator_Blocked *impl; 555 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 556 const CeedInt blk_size = 8; 557 CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 558 ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 559 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 560 CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 561 CeedQFunction qf; 562 ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 563 CeedOperatorField *op_input_fields, *op_output_fields; 564 ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 565 &num_output_fields, &op_output_fields); 566 CeedChkBackend(ierr); 567 CeedQFunctionField *qf_input_fields, *qf_output_fields; 568 ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 569 &qf_output_fields); 570 CeedChkBackend(ierr); 571 CeedVector vec, lvec = impl->qf_lvec; 572 CeedInt num_active_in = impl->qf_num_active_in, 573 num_active_out = impl->qf_num_active_out; 574 CeedVector *active_in = impl->qf_active_in; 575 CeedScalar *a, *tmp; 576 Ceed ceed; 577 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 578 579 // Setup 580 ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 581 582 // Check for identity 583 if (impl->is_identity_qf) 584 // LCOV_EXCL_START 585 return CeedError(ceed, CEED_ERROR_BACKEND, 586 "Assembling identity QFunctions not supported"); 587 // LCOV_EXCL_STOP 588 589 // Input Evecs and Restriction 590 ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields, 591 op_input_fields, NULL, true, impl, 592 request); CeedChkBackend(ierr); 593 594 // Count number of active input fields 595 if (!num_active_in) { 596 for (CeedInt i=0; i<num_input_fields; i++) { 597 // Get input vector 598 ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 599 CeedChkBackend(ierr); 600 // Check if active input 601 if (vec == CEED_VECTOR_ACTIVE) { 602 ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 603 CeedChkBackend(ierr); 604 ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr); 605 ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp); 606 CeedChkBackend(ierr); 607 ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr); 608 for (CeedInt field=0; field<size; field++) { 609 ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]); 610 CeedChkBackend(ierr); 611 ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST, 612 CEED_USE_POINTER, &tmp[field*Q*blk_size]); 613 CeedChkBackend(ierr); 614 } 615 num_active_in += size; 616 ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr); 617 } 618 } 619 impl->qf_num_active_in = num_active_in; 620 impl->qf_active_in = active_in; 621 } 622 623 // Count number of active output fields 624 if (!num_active_out) { 625 for (CeedInt i=0; i<num_output_fields; i++) { 626 // Get output vector 627 ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 628 CeedChkBackend(ierr); 629 // Check if active output 630 if (vec == CEED_VECTOR_ACTIVE) { 631 ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 632 CeedChkBackend(ierr); 633 num_active_out += size; 634 } 635 } 636 impl->qf_num_active_out = num_active_out; 637 } 638 639 // Check sizes 640 if (!num_active_in || !num_active_out) 641 // LCOV_EXCL_START 642 return CeedError(ceed, CEED_ERROR_BACKEND, 643 "Cannot assemble QFunction without active inputs " 644 "and outputs"); 645 // LCOV_EXCL_STOP 646 647 // Setup Lvec 648 if (!lvec) { 649 ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out, 650 &lvec); CeedChkBackend(ierr); 651 impl->qf_lvec = lvec; 652 } 653 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 654 655 // Build objects if needed 656 CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; 657 if (build_objects) { 658 // Create output restriction 659 ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q, 660 num_active_in*num_active_out, 661 num_active_in*num_active_out*num_elem*Q, 662 strides, rstr); CeedChkBackend(ierr); 663 // Create assembled vector 664 ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out, 665 assembled); CeedChkBackend(ierr); 666 } 667 668 // Loop through elements 669 for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 670 // Input basis apply 671 ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields, 672 num_input_fields, blk_size, true, impl); 673 CeedChkBackend(ierr); 674 675 // Assemble QFunction 676 for (CeedInt in=0; in<num_active_in; in++) { 677 // Set Inputs 678 ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr); 679 if (num_active_in > 1) { 680 ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in], 681 0.0); CeedChkBackend(ierr); 682 } 683 // Set Outputs 684 for (CeedInt out=0; out<num_output_fields; out++) { 685 // Get output vector 686 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 687 CeedChkBackend(ierr); 688 // Check if active output 689 if (vec == CEED_VECTOR_ACTIVE) { 690 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, 691 CEED_USE_POINTER, a); CeedChkBackend(ierr); 692 ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size); 693 CeedChkBackend(ierr); 694 a += size*Q*blk_size; // Advance the pointer by the size of the output 695 } 696 } 697 // Apply QFunction 698 ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 699 CeedChkBackend(ierr); 700 } 701 } 702 703 // Un-set output Qvecs to prevent accidental overwrite of Assembled 704 for (CeedInt out=0; out<num_output_fields; out++) { 705 // Get output vector 706 ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 707 CeedChkBackend(ierr); 708 // Check if active output 709 if (vec == CEED_VECTOR_ACTIVE) { 710 CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, 711 NULL); CeedChkBackend(ierr); 712 } 713 } 714 715 // Restore input arrays 716 ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields, 717 op_input_fields, true, impl); CeedChkBackend(ierr); 718 719 // Output blocked restriction 720 ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr); 721 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 722 CeedElemRestriction blk_rstr = impl->qf_blk_rstr; 723 if (!impl->qf_blk_rstr) { 724 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size, 725 num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q, 726 strides, &blk_rstr); CeedChkBackend(ierr); 727 impl->qf_blk_rstr = blk_rstr; 728 } 729 ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled, 730 request); CeedChkBackend(ierr); 731 732 return CEED_ERROR_SUCCESS; 733 } 734 735 //------------------------------------------------------------------------------ 736 // Assemble Linear QFunction 737 //------------------------------------------------------------------------------ 738 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op, 739 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 740 return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, true, assembled, 741 rstr, request); 742 } 743 744 //------------------------------------------------------------------------------ 745 // Update Assembled Linear QFunction 746 //------------------------------------------------------------------------------ 747 static int CeedOperatorLinearAssembleQFunctionUpdate_Blocked(CeedOperator op, 748 CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 749 return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, false, &assembled, 750 &rstr, request); 751 } 752 753 //------------------------------------------------------------------------------ 754 // Operator Destroy 755 //------------------------------------------------------------------------------ 756 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 757 int ierr; 758 CeedOperator_Blocked *impl; 759 ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 760 761 for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) { 762 ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr); 763 ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr); 764 } 765 ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr); 766 ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr); 767 ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr); 768 ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr); 769 770 for (CeedInt i=0; i<impl->num_e_vecs_in; i++) { 771 ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr); 772 ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr); 773 } 774 ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr); 775 ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr); 776 777 for (CeedInt i=0; i<impl->num_e_vecs_out; i++) { 778 ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr); 779 ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 780 } 781 ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr); 782 ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr); 783 784 // QFunction assembly data 785 for (CeedInt i=0; i<impl->qf_num_active_in; i++) { 786 ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr); 787 } 788 ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr); 789 ierr = CeedVectorDestroy(&impl->qf_lvec); CeedChkBackend(ierr); 790 ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr); 791 792 ierr = CeedFree(&impl); CeedChkBackend(ierr); 793 return CEED_ERROR_SUCCESS; 794 } 795 796 //------------------------------------------------------------------------------ 797 // Operator Create 798 //------------------------------------------------------------------------------ 799 int CeedOperatorCreate_Blocked(CeedOperator op) { 800 int ierr; 801 Ceed ceed; 802 ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 803 CeedOperator_Blocked *impl; 804 805 ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 806 ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 807 808 ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 809 CeedOperatorLinearAssembleQFunction_Blocked); 810 CeedChkBackend(ierr); 811 ierr = CeedSetBackendFunction(ceed, "Operator", op, 812 "LinearAssembleQFunctionUpdate", 813 CeedOperatorLinearAssembleQFunctionUpdate_Blocked); 814 CeedChkBackend(ierr); 815 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 816 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr); 817 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 818 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr); 819 return CEED_ERROR_SUCCESS; 820 } 821 //------------------------------------------------------------------------------ 822