1 /// @file 2 /// MatCEED implementation 3 4 #include <ceed.h> 5 #include <ceed/backend.h> 6 #include <mat-ceed-impl.h> 7 #include <mat-ceed.h> 8 #include <petsc-ceed-utils.h> 9 #include <petsc-ceed.h> 10 #include <petscdmplex.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 PetscClassId MATCEED_CLASSID; 15 PetscLogEvent MATCEED_MULT, MATCEED_MULT_TRANSPOSE; 16 17 /** 18 @brief Register MATCEED log events. 19 20 Not collective across MPI processes. 21 22 @return An error code: 0 - success, otherwise - failure 23 **/ 24 static PetscErrorCode MatCeedRegisterLogEvents() { 25 static PetscBool registered = PETSC_FALSE; 26 27 PetscFunctionBeginUser; 28 if (registered) PetscFunctionReturn(PETSC_SUCCESS); 29 PetscCall(PetscClassIdRegister("MATCEED", &MATCEED_CLASSID)); 30 PetscCall(PetscLogEventRegister("MATCEED Mult", MATCEED_CLASSID, &MATCEED_MULT)); 31 PetscCall(PetscLogEventRegister("MATCEED Mult Transpose", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE)); 32 registered = PETSC_TRUE; 33 PetscFunctionReturn(PETSC_SUCCESS); 34 } 35 36 /** 37 @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar. 38 The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`. 39 The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail. 40 41 Collective across MPI processes. 42 43 @param[in] mat_ceed `MATCEED` to assemble 44 @param[in,out] mat_coo `MATAIJ` or similar to assemble into 45 46 @return An error code: 0 - success, otherwise - failure 47 **/ 48 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) { 49 MatCeedContext ctx; 50 51 PetscFunctionBeginUser; 52 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 53 54 // Check if COO pattern set 55 { 56 PetscInt index = -1; 57 58 for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) { 59 if (ctx->mats_assembled_pbd[i] == mat_coo) index = i; 60 } 61 if (index == -1) { 62 PetscInt *rows_petsc = NULL, *cols_petsc = NULL; 63 CeedInt *rows_ceed, *cols_ceed; 64 PetscCount num_entries; 65 PetscLogStage stage_amg_setup; 66 67 // -- Assemble sparsity pattern if mat hasn't been assembled before 68 PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup)); 69 if (stage_amg_setup == -1) { 70 PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup)); 71 } 72 PetscCall(PetscLogStagePush(stage_amg_setup)); 73 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed)); 74 PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc)); 75 PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc)); 76 PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc)); 77 free(rows_petsc); 78 free(cols_petsc); 79 if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd)); 80 PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd)); 81 ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo; 82 PetscCall(PetscLogStagePop()); 83 } 84 } 85 86 // Assemble mat_ceed 87 PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY)); 88 { 89 const CeedScalar *values; 90 MatType mat_type; 91 CeedMemType mem_type = CEED_MEM_HOST; 92 PetscBool is_spd, is_spd_known; 93 94 PetscCall(MatGetType(mat_coo, &mat_type)); 95 if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; 96 else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE; 97 else mem_type = CEED_MEM_HOST; 98 99 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE)); 100 PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values)); 101 PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES)); 102 PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd)); 103 if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd)); 104 PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values)); 105 } 106 PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 /** 111 @brief Assemble inner `Mat` for diagonal `PC` operations 112 113 Collective across MPI processes. 114 115 @param[in] mat_ceed `MATCEED` to invert 116 @param[in] use_ceed_pbd Boolean flag to use libCEED PBD assembly 117 @param[out] mat_inner Inner `Mat` for diagonal operations 118 119 @return An error code: 0 - success, otherwise - failure 120 **/ 121 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) { 122 MatCeedContext ctx; 123 124 PetscFunctionBeginUser; 125 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 126 if (use_ceed_pbd) { 127 // Check if COO pattern set 128 if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_pbd_internal)); 129 130 // Assemble mat_assembled_full_internal 131 PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal)); 132 if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal; 133 } else { 134 // Check if COO pattern set 135 if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal)); 136 137 // Assemble mat_assembled_full_internal 138 PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal)); 139 if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal; 140 } 141 PetscFunctionReturn(PETSC_SUCCESS); 142 } 143 144 /** 145 @brief Get `MATCEED` diagonal block for Jacobi. 146 147 Collective across MPI processes. 148 149 @param[in] mat_ceed `MATCEED` to invert 150 @param[out] mat_block The diagonal block matrix 151 152 @return An error code: 0 - success, otherwise - failure 153 **/ 154 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) { 155 Mat mat_inner = NULL; 156 MatCeedContext ctx; 157 158 PetscFunctionBeginUser; 159 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 160 161 // Assemble inner mat if needed 162 PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner)); 163 164 // Get block diagonal 165 PetscCall(MatGetDiagonalBlock(mat_inner, mat_block)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 /** 170 @brief Invert `MATCEED` diagonal block for Jacobi. 171 172 Collective across MPI processes. 173 174 @param[in] mat_ceed `MATCEED` to invert 175 @param[out] values The block inverses in column major order 176 177 @return An error code: 0 - success, otherwise - failure 178 **/ 179 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) { 180 Mat mat_inner = NULL; 181 MatCeedContext ctx; 182 183 PetscFunctionBeginUser; 184 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 185 186 // Assemble inner mat if needed 187 PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner)); 188 189 // Invert PB diagonal 190 PetscCall(MatInvertBlockDiagonal(mat_inner, values)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 /** 195 @brief Invert `MATCEED` variable diagonal block for Jacobi. 196 197 Collective across MPI processes. 198 199 @param[in] mat_ceed `MATCEED` to invert 200 @param[in] num_blocks The number of blocks on the process 201 @param[in] block_sizes The size of each block on the process 202 @param[out] values The block inverses in column major order 203 204 @return An error code: 0 - success, otherwise - failure 205 **/ 206 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) { 207 Mat mat_inner = NULL; 208 MatCeedContext ctx; 209 210 PetscFunctionBeginUser; 211 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 212 213 // Assemble inner mat if needed 214 PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner)); 215 216 // Invert PB diagonal 217 PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 /** 222 @brief View `MATCEED`. 223 224 Collective across MPI processes. 225 226 @param[in] mat_ceed `MATCEED` to view 227 @param[in] viewer The visualization context 228 229 @return An error code: 0 - success, otherwise - failure 230 **/ 231 static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) { 232 PetscBool is_ascii; 233 PetscViewerFormat format; 234 PetscMPIInt size; 235 MatCeedContext ctx; 236 237 PetscFunctionBeginUser; 238 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 239 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 240 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer)); 241 242 PetscCall(PetscViewerGetFormat(viewer, &format)); 243 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size)); 244 if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS); 245 246 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii)); 247 { 248 FILE *file; 249 250 PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n Default COO MatType:%s\n", ctx->coo_mat_type)); 251 PetscCall(PetscViewerASCIIGetPointer(viewer, &file)); 252 PetscCall(PetscViewerASCIIPrintf(viewer, " libCEED Operator:\n")); 253 PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file)); 254 if (ctx->op_mult_transpose) { 255 PetscCall(PetscViewerASCIIPrintf(viewer, " libCEED Transpose Operator:\n")); 256 PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file)); 257 } 258 } 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 // ----------------------------------------------------------------------------- 263 // MatCeed 264 // ----------------------------------------------------------------------------- 265 266 /** 267 @brief Create PETSc `Mat` from libCEED operators. 268 269 Collective across MPI processes. 270 271 @param[in] dm_x Input `DM` 272 @param[in] dm_y Output `DM` 273 @param[in] op_mult `CeedOperator` for forward evaluation 274 @param[in] op_mult_transpose `CeedOperator` for transpose evaluation 275 @param[out] mat New MatCeed 276 277 @return An error code: 0 - success, otherwise - failure 278 **/ 279 PetscErrorCode MatCeedCreate(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) { 280 PetscInt X_l_size, X_g_size, Y_l_size, Y_g_size; 281 VecType vec_type; 282 MatCeedContext ctx; 283 284 PetscFunctionBeginUser; 285 PetscCall(MatCeedRegisterLogEvents()); 286 287 // Collect context data 288 PetscCall(DMGetVecType(dm_x, &vec_type)); 289 { 290 Vec X; 291 292 PetscCall(DMGetGlobalVector(dm_x, &X)); 293 PetscCall(VecGetSize(X, &X_g_size)); 294 PetscCall(VecGetLocalSize(X, &X_l_size)); 295 PetscCall(DMRestoreGlobalVector(dm_x, &X)); 296 } 297 if (dm_y) { 298 Vec Y; 299 300 PetscCall(DMGetGlobalVector(dm_y, &Y)); 301 PetscCall(VecGetSize(Y, &Y_g_size)); 302 PetscCall(VecGetLocalSize(Y, &Y_l_size)); 303 PetscCall(DMRestoreGlobalVector(dm_y, &Y)); 304 } else { 305 dm_y = dm_x; 306 Y_g_size = X_g_size; 307 Y_l_size = X_l_size; 308 } 309 310 // Create context 311 { 312 Vec X_loc, Y_loc_transpose = NULL; 313 314 PetscCall(DMCreateLocalVector(dm_x, &X_loc)); 315 PetscCall(VecZeroEntries(X_loc)); 316 if (op_mult_transpose) { 317 PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose)); 318 PetscCall(VecZeroEntries(Y_loc_transpose)); 319 } 320 PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE, &ctx)); 321 PetscCall(VecDestroy(&X_loc)); 322 PetscCall(VecDestroy(&Y_loc_transpose)); 323 } 324 325 // Create mat 326 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat)); 327 PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED)); 328 // -- Set block and variable block sizes 329 if (dm_x == dm_y) { 330 MatType dm_mat_type, dm_mat_type_copy; 331 Mat temp_mat; 332 333 PetscCall(DMGetMatType(dm_x, &dm_mat_type)); 334 PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy)); 335 PetscCall(DMSetMatType(dm_x, MATAIJ)); 336 PetscCall(DMCreateMatrix(dm_x, &temp_mat)); 337 PetscCall(DMSetMatType(dm_x, dm_mat_type_copy)); 338 PetscCall(PetscFree(dm_mat_type_copy)); 339 340 { 341 PetscInt block_size, num_blocks, max_vblock_size = PETSC_INT_MAX; 342 const PetscInt *vblock_sizes; 343 344 // -- Get block sizes 345 PetscCall(MatGetBlockSize(temp_mat, &block_size)); 346 PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes)); 347 { 348 PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX}; 349 350 for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]); 351 PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max)); 352 max_vblock_size = global_min_max[1]; 353 } 354 355 // -- Copy block sizes 356 if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size)); 357 if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes)); 358 359 // -- Check libCEED compatibility 360 { 361 bool is_composite; 362 363 ctx->is_ceed_pbd_valid = PETSC_TRUE; 364 ctx->is_ceed_vpbd_valid = PETSC_TRUE; 365 PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite)); 366 if (is_composite) { 367 CeedInt num_sub_operators; 368 CeedOperator *sub_operators; 369 370 PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators)); 371 PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators)); 372 for (CeedInt i = 0; i < num_sub_operators; i++) { 373 CeedInt num_bases, num_comp; 374 CeedBasis *active_bases; 375 CeedOperatorAssemblyData assembly_data; 376 377 PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data)); 378 PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL)); 379 PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp)); 380 if (num_bases > 1) { 381 ctx->is_ceed_pbd_valid = PETSC_FALSE; 382 ctx->is_ceed_vpbd_valid = PETSC_FALSE; 383 } 384 if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; 385 if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE; 386 } 387 } else { 388 // LCOV_EXCL_START 389 CeedInt num_bases, num_comp; 390 CeedBasis *active_bases; 391 CeedOperatorAssemblyData assembly_data; 392 393 PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data)); 394 PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL)); 395 PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp)); 396 if (num_bases > 1) { 397 ctx->is_ceed_pbd_valid = PETSC_FALSE; 398 ctx->is_ceed_vpbd_valid = PETSC_FALSE; 399 } 400 if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; 401 if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE; 402 // LCOV_EXCL_STOP 403 } 404 { 405 PetscInt local_is_valid[2], global_is_valid[2]; 406 407 local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid; 408 PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid)); 409 ctx->is_ceed_pbd_valid = global_is_valid[0]; 410 local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid; 411 PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid)); 412 ctx->is_ceed_vpbd_valid = global_is_valid[0]; 413 } 414 } 415 } 416 PetscCall(MatDestroy(&temp_mat)); 417 } 418 // -- Set internal mat type 419 { 420 VecType vec_type; 421 MatType coo_mat_type; 422 423 PetscCall(VecGetType(ctx->X_loc, &vec_type)); 424 if (strstr(vec_type, VECCUDA)) coo_mat_type = MATAIJCUSPARSE; 425 else if (strstr(vec_type, VECKOKKOS)) coo_mat_type = MATAIJKOKKOS; 426 else coo_mat_type = MATAIJ; 427 PetscCall(PetscStrallocpy(coo_mat_type, &ctx->coo_mat_type)); 428 } 429 // -- Set mat operations 430 PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy)); 431 PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed)); 432 PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed)); 433 if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed)); 434 PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed)); 435 PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed)); 436 PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed)); 437 PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed)); 438 PetscCall(MatShellSetVecType(*mat, vec_type)); 439 PetscFunctionReturn(PETSC_SUCCESS); 440 } 441 442 /** 443 @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`. 444 445 Collective across MPI processes. 446 447 @param[in] mat_ceed `MATCEED` to copy from 448 @param[out] mat_other `MatShell` or `MATCEED` to copy into 449 450 @return An error code: 0 - success, otherwise - failure 451 **/ 452 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) { 453 PetscFunctionBeginUser; 454 PetscCall(MatCeedRegisterLogEvents()); 455 456 // Check type compatibility 457 { 458 PetscBool is_matceed = PETSC_FALSE, is_matshell = PETSC_FALSE; 459 MatType mat_type_ceed, mat_type_other; 460 461 PetscCall(MatGetType(mat_ceed, &mat_type_ceed)); 462 PetscCall(PetscStrcmp(mat_type_ceed, MATCEED, &is_matceed)); 463 PetscCheck(is_matceed, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED); 464 PetscCall(MatGetType(mat_other, &mat_type_other)); 465 PetscCall(PetscStrcmp(mat_type_other, MATCEED, &is_matceed)); 466 PetscCall(PetscStrcmp(mat_type_other, MATSHELL, &is_matceed)); 467 PetscCheck(is_matceed || is_matshell, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_other must have type " MATCEED " or " MATSHELL); 468 } 469 470 // Check dimension compatibility 471 { 472 PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size; 473 474 PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size)); 475 PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size)); 476 PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size)); 477 PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size)); 478 PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) && 479 (X_l_ceed_size == X_l_other_size), 480 PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, 481 "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT 482 "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT 483 ", %" PetscInt_FMT ")", 484 Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size); 485 } 486 487 // Convert 488 { 489 VecType vec_type; 490 MatCeedContext ctx; 491 492 PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED)); 493 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 494 PetscCall(MatCeedContextReference(ctx)); 495 PetscCall(MatShellSetContext(mat_other, ctx)); 496 PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy)); 497 PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed)); 498 PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed)); 499 if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed)); 500 PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed)); 501 PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed)); 502 PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed)); 503 PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed)); 504 { 505 PetscInt block_size; 506 507 PetscCall(MatGetBlockSize(mat_ceed, &block_size)); 508 if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size)); 509 } 510 { 511 PetscInt num_blocks; 512 const PetscInt *block_sizes; 513 514 PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes)); 515 if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes)); 516 } 517 PetscCall(DMGetVecType(ctx->dm_x, &vec_type)); 518 PetscCall(MatShellSetVecType(mat_other, vec_type)); 519 } 520 PetscFunctionReturn(PETSC_SUCCESS); 521 } 522 523 /** 524 @brief Setup a `Mat` with the same COO pattern as a `MatCEED`. 525 526 Collective across MPI processes. 527 528 @param[in] mat_ceed `MATCEED` 529 @param[out] mat_coo Sparse `Mat` with same COO pattern 530 531 @return An error code: 0 - success, otherwise - failure 532 **/ 533 PetscErrorCode MatCeedCreateMatCOO(Mat mat_ceed, Mat *mat_coo) { 534 MatCeedContext ctx; 535 536 PetscFunctionBeginUser; 537 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 538 539 PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "COO assembly only supported for MATCEED on a single DM"); 540 541 // Check cl mat type 542 { 543 PetscBool is_coo_mat_type_cl = PETSC_FALSE; 544 char coo_mat_type_cl[64]; 545 546 // Check for specific CL coo mat type for this Mat 547 { 548 const char *mat_ceed_prefix = NULL; 549 550 PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix)); 551 PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL); 552 PetscCall(PetscOptionsFList("-ceed_coo_mat_type", "Default MATCEED COO assembly MatType", NULL, MatList, coo_mat_type_cl, coo_mat_type_cl, 553 sizeof(coo_mat_type_cl), &is_coo_mat_type_cl)); 554 PetscOptionsEnd(); 555 if (is_coo_mat_type_cl) { 556 PetscCall(PetscFree(ctx->coo_mat_type)); 557 PetscCall(PetscStrallocpy(coo_mat_type_cl, &ctx->coo_mat_type)); 558 } 559 } 560 } 561 562 // Create sparse matrix 563 { 564 MatType dm_mat_type, dm_mat_type_copy; 565 566 PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type)); 567 PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy)); 568 PetscCall(DMSetMatType(ctx->dm_x, ctx->coo_mat_type)); 569 PetscCall(DMCreateMatrix(ctx->dm_x, mat_coo)); 570 PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy)); 571 PetscCall(PetscFree(dm_mat_type_copy)); 572 } 573 PetscFunctionReturn(PETSC_SUCCESS); 574 } 575 576 /** 577 @brief Setup the COO preallocation `MATCEED` into a `MATAIJ` or similar. 578 The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail. 579 580 Collective across MPI processes. 581 582 @param[in] mat_ceed `MATCEED` to assemble 583 @param[in,out] mat_coo `MATAIJ` or similar to assemble into 584 585 @return An error code: 0 - success, otherwise - failure 586 **/ 587 PetscErrorCode MatCeedSetPreallocationCOO(Mat mat_ceed, Mat mat_coo) { 588 MatCeedContext ctx; 589 590 PetscFunctionBeginUser; 591 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 592 593 { 594 PetscInt *rows_petsc = NULL, *cols_petsc = NULL; 595 CeedInt *rows_ceed, *cols_ceed; 596 PetscCount num_entries; 597 PetscLogStage stage_amg_setup; 598 599 // -- Assemble sparsity pattern if mat hasn't been assembled before 600 PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup)); 601 if (stage_amg_setup == -1) { 602 PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup)); 603 } 604 PetscCall(PetscLogStagePush(stage_amg_setup)); 605 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed)); 606 PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc)); 607 PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc)); 608 PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc)); 609 free(rows_petsc); 610 free(cols_petsc); 611 if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full)); 612 PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full)); 613 ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo; 614 PetscCall(PetscLogStagePop()); 615 } 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /** 620 @brief Assemble a `MATCEED` into a `MATAIJ` or similar. 621 The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`. 622 The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail. 623 624 Collective across MPI processes. 625 626 @param[in] mat_ceed `MATCEED` to assemble 627 @param[in,out] mat_coo `MATAIJ` or similar to assemble into 628 629 @return An error code: 0 - success, otherwise - failure 630 **/ 631 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) { 632 MatCeedContext ctx; 633 634 PetscFunctionBeginUser; 635 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 636 637 // Set COO pattern if needed 638 { 639 CeedInt index = -1; 640 641 for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) { 642 if (ctx->mats_assembled_full[i] == mat_coo) index = i; 643 } 644 if (index == -1) PetscCall(MatCeedSetPreallocationCOO(mat_ceed, mat_coo)); 645 } 646 647 // Assemble mat_ceed 648 PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY)); 649 { 650 const CeedScalar *values; 651 MatType mat_type; 652 CeedMemType mem_type = CEED_MEM_HOST; 653 PetscBool is_spd, is_spd_known; 654 655 PetscCall(MatGetType(mat_coo, &mat_type)); 656 if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; 657 else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE; 658 else mem_type = CEED_MEM_HOST; 659 660 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full)); 661 PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values)); 662 PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES)); 663 PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd)); 664 if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd)); 665 PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values)); 666 } 667 PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY)); 668 PetscFunctionReturn(PETSC_SUCCESS); 669 } 670 671 /** 672 @brief Set the current value of a context field for a `MatCEED`. 673 674 Not collective across MPI processes. 675 676 @param[in,out] mat `MatCEED` 677 @param[in] name Name of the context field 678 @param[in] value New context field value 679 680 @return An error code: 0 - success, otherwise - failure 681 **/ 682 PetscErrorCode MatCeedSetContextDouble(Mat mat, const char *name, double value) { 683 PetscBool was_updated = PETSC_FALSE; 684 MatCeedContext ctx; 685 686 PetscFunctionBeginUser; 687 PetscCall(MatShellGetContext(mat, &ctx)); 688 { 689 CeedContextFieldLabel label = NULL; 690 691 PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult, name, &label)); 692 if (label) { 693 PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult, label, &value)); 694 was_updated = PETSC_TRUE; 695 } 696 if (ctx->op_mult_transpose) { 697 label = NULL; 698 PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult_transpose, name, &label)); 699 if (label) { 700 PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult_transpose, label, &value)); 701 was_updated = PETSC_TRUE; 702 } 703 } 704 } 705 if (was_updated) { 706 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 707 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 708 } 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 /** 713 @brief Set the current `PetscReal` value of a context field for a `MatCEED`. 714 715 Not collective across MPI processes. 716 717 @param[in,out] mat `MatCEED` 718 @param[in] name Name of the context field 719 @param[in] value New context field value 720 721 @return An error code: 0 - success, otherwise - failure 722 **/ 723 PetscErrorCode MatCeedSetContextReal(Mat mat, const char *name, PetscReal value) { 724 double value_double = value; 725 726 PetscFunctionBeginUser; 727 PetscCall(MatCeedSetContextDouble(mat, name, value_double)); 728 PetscFunctionReturn(PETSC_SUCCESS); 729 } 730 731 /** 732 @brief Get the current value of a context field for a `MatCEED`. 733 734 Not collective across MPI processes. 735 736 @param[in] mat `MatCEED` 737 @param[in] name Name of the context field 738 @param[out] value Current context field value 739 740 @return An error code: 0 - success, otherwise - failure 741 **/ 742 PetscErrorCode MatCeedGetContextDouble(Mat mat, const char *name, double *value) { 743 MatCeedContext ctx; 744 745 PetscFunctionBeginUser; 746 PetscCall(MatShellGetContext(mat, &ctx)); 747 { 748 CeedContextFieldLabel label = NULL; 749 CeedOperator op = ctx->op_mult; 750 751 PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label)); 752 if (!label && ctx->op_mult_transpose) { 753 op = ctx->op_mult_transpose; 754 PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label)); 755 } 756 if (label) { 757 PetscSizeT num_values; 758 const double *values_ceed; 759 760 PetscCallCeed(ctx->ceed, CeedOperatorGetContextDoubleRead(op, label, &num_values, &values_ceed)); 761 *value = values_ceed[0]; 762 PetscCallCeed(ctx->ceed, CeedOperatorRestoreContextDoubleRead(op, label, &values_ceed)); 763 } 764 } 765 PetscFunctionReturn(PETSC_SUCCESS); 766 } 767 768 /** 769 @brief Get the current `PetscReal` value of a context field for a `MatCEED`. 770 771 Not collective across MPI processes. 772 773 @param[in] mat `MatCEED` 774 @param[in] name Name of the context field 775 @param[out] value Current context field value 776 777 @return An error code: 0 - success, otherwise - failure 778 **/ 779 PetscErrorCode MatCeedGetContextReal(Mat mat, const char *name, PetscReal *value) { 780 double value_double; 781 782 PetscFunctionBeginUser; 783 PetscCall(MatCeedGetContextDouble(mat, name, &value_double)); 784 *value = value_double; 785 PetscFunctionReturn(PETSC_SUCCESS); 786 } 787 788 /** 789 @brief Set user context for a `MATCEED`. 790 791 Collective across MPI processes. 792 793 @param[in,out] mat `MATCEED` 794 @param[in] f The context destroy function, or NULL 795 @param[in] ctx User context, or NULL to unset 796 797 @return An error code: 0 - success, otherwise - failure 798 **/ 799 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) { 800 PetscContainer user_ctx = NULL; 801 802 PetscFunctionBeginUser; 803 if (ctx) { 804 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx)); 805 PetscCall(PetscContainerSetPointer(user_ctx, ctx)); 806 PetscCall(PetscContainerSetUserDestroy(user_ctx, f)); 807 } 808 PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx)); 809 PetscCall(PetscContainerDestroy(&user_ctx)); 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 /** 814 @brief Retrieve the user context for a `MATCEED`. 815 816 Collective across MPI processes. 817 818 @param[in,out] mat `MATCEED` 819 @param[in] ctx User context 820 821 @return An error code: 0 - success, otherwise - failure 822 **/ 823 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) { 824 PetscContainer user_ctx; 825 826 PetscFunctionBeginUser; 827 PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx)); 828 if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx)); 829 else *(void **)ctx = NULL; 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 /** 833 @brief Set a user defined matrix operation for a `MATCEED` matrix. 834 835 Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by 836 `MatCeedSetContext()`. 837 838 Collective across MPI processes. 839 840 @param[in,out] mat `MATCEED` 841 @param[in] op Name of the `MatOperation` 842 @param[in] g Function that provides the operation 843 844 @return An error code: 0 - success, otherwise - failure 845 **/ 846 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) { 847 PetscFunctionBeginUser; 848 PetscCall(MatShellSetOperation(mat, op, g)); 849 PetscFunctionReturn(PETSC_SUCCESS); 850 } 851 852 /** 853 @brief Sets the default COO matrix type as a string from the `MATCEED`. 854 855 Collective across MPI processes. 856 857 @param[in,out] mat `MATCEED` 858 @param[in] type COO `MatType` to set 859 860 @return An error code: 0 - success, otherwise - failure 861 **/ 862 PetscErrorCode MatCeedSetCOOMatType(Mat mat, MatType type) { 863 MatCeedContext ctx; 864 865 PetscFunctionBeginUser; 866 PetscCall(MatShellGetContext(mat, &ctx)); 867 // Check if same 868 { 869 size_t len_old, len_new; 870 PetscBool is_same = PETSC_FALSE; 871 872 PetscCall(PetscStrlen(ctx->coo_mat_type, &len_old)); 873 PetscCall(PetscStrlen(type, &len_new)); 874 if (len_old == len_new) PetscCall(PetscStrncmp(ctx->coo_mat_type, type, len_old, &is_same)); 875 if (is_same) PetscFunctionReturn(PETSC_SUCCESS); 876 } 877 // Clean up old mats in different format 878 // LCOV_EXCL_START 879 if (ctx->mat_assembled_full_internal) { 880 for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) { 881 if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) { 882 for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) { 883 ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j]; 884 } 885 ctx->num_mats_assembled_full--; 886 // Note: we'll realloc this array again, so no need to shrink the allocation 887 PetscCall(MatDestroy(&ctx->mat_assembled_full_internal)); 888 } 889 } 890 } 891 if (ctx->mat_assembled_pbd_internal) { 892 for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) { 893 if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) { 894 for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) { 895 ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j]; 896 } 897 // Note: we'll realloc this array again, so no need to shrink the allocation 898 ctx->num_mats_assembled_pbd--; 899 PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal)); 900 } 901 } 902 } 903 PetscCall(PetscFree(ctx->coo_mat_type)); 904 PetscCall(PetscStrallocpy(type, &ctx->coo_mat_type)); 905 PetscFunctionReturn(PETSC_SUCCESS); 906 // LCOV_EXCL_STOP 907 } 908 909 /** 910 @brief Gets the default COO matrix type as a string from the `MATCEED`. 911 912 Collective across MPI processes. 913 914 @param[in,out] mat `MATCEED` 915 @param[in] type COO `MatType` 916 917 @return An error code: 0 - success, otherwise - failure 918 **/ 919 PetscErrorCode MatCeedGetCOOMatType(Mat mat, MatType *type) { 920 MatCeedContext ctx; 921 922 PetscFunctionBeginUser; 923 PetscCall(MatShellGetContext(mat, &ctx)); 924 *type = ctx->coo_mat_type; 925 PetscFunctionReturn(PETSC_SUCCESS); 926 } 927 928 /** 929 @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 930 931 Not collective across MPI processes. 932 933 @param[in,out] mat `MATCEED` 934 @param[in] X_loc Input PETSc local vector, or NULL 935 @param[in] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 936 937 @return An error code: 0 - success, otherwise - failure 938 **/ 939 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) { 940 MatCeedContext ctx; 941 942 PetscFunctionBeginUser; 943 PetscCall(MatShellGetContext(mat, &ctx)); 944 if (X_loc) { 945 PetscInt len_old, len_new; 946 947 PetscCall(VecGetSize(ctx->X_loc, &len_old)); 948 PetscCall(VecGetSize(X_loc, &len_new)); 949 PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT, 950 len_new, len_old); 951 PetscCall(VecReferenceCopy(X_loc, &ctx->X_loc)); 952 } 953 if (Y_loc_transpose) { 954 PetscInt len_old, len_new; 955 956 PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old)); 957 PetscCall(VecGetSize(Y_loc_transpose, &len_new)); 958 PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, 959 "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old); 960 PetscCall(VecReferenceCopy(Y_loc_transpose, &ctx->Y_loc_transpose)); 961 } 962 PetscFunctionReturn(PETSC_SUCCESS); 963 } 964 965 /** 966 @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 967 968 Not collective across MPI processes. 969 970 @param[in,out] mat `MATCEED` 971 @param[out] X_loc Input PETSc local vector, or NULL 972 @param[out] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 973 974 @return An error code: 0 - success, otherwise - failure 975 **/ 976 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) { 977 MatCeedContext ctx; 978 979 PetscFunctionBeginUser; 980 PetscCall(MatShellGetContext(mat, &ctx)); 981 if (X_loc) { 982 *X_loc = NULL; 983 PetscCall(VecReferenceCopy(ctx->X_loc, X_loc)); 984 } 985 if (Y_loc_transpose) { 986 *Y_loc_transpose = NULL; 987 PetscCall(VecReferenceCopy(ctx->Y_loc_transpose, Y_loc_transpose)); 988 } 989 PetscFunctionReturn(PETSC_SUCCESS); 990 } 991 992 /** 993 @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 994 995 Not collective across MPI processes. 996 997 @param[in,out] mat MatCeed 998 @param[out] X_loc Input PETSc local vector, or NULL 999 @param[out] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 1000 1001 @return An error code: 0 - success, otherwise - failure 1002 **/ 1003 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) { 1004 PetscFunctionBeginUser; 1005 if (X_loc) PetscCall(VecDestroy(X_loc)); 1006 if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose)); 1007 PetscFunctionReturn(PETSC_SUCCESS); 1008 } 1009 1010 /** 1011 @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 1012 1013 Not collective across MPI processes. 1014 1015 @param[in,out] mat MatCeed 1016 @param[out] op_mult libCEED `CeedOperator` for `MatMult()`, or NULL 1017 @param[out] op_mult_transpose libCEED `CeedOperator` for `MatMultTranspose()`, or NULL 1018 1019 @return An error code: 0 - success, otherwise - failure 1020 **/ 1021 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) { 1022 MatCeedContext ctx; 1023 1024 PetscFunctionBeginUser; 1025 PetscCall(MatShellGetContext(mat, &ctx)); 1026 if (op_mult) { 1027 *op_mult = NULL; 1028 PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult)); 1029 } 1030 if (op_mult_transpose) { 1031 *op_mult_transpose = NULL; 1032 PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose)); 1033 } 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } 1036 1037 /** 1038 @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 1039 1040 Not collective across MPI processes. 1041 1042 @param[in,out] mat MatCeed 1043 @param[out] op_mult libCEED `CeedOperator` for `MatMult()`, or NULL 1044 @param[out] op_mult_transpose libCEED `CeedOperator` for `MatMultTranspose()`, or NULL 1045 1046 @return An error code: 0 - success, otherwise - failure 1047 **/ 1048 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) { 1049 MatCeedContext ctx; 1050 1051 PetscFunctionBeginUser; 1052 PetscCall(MatShellGetContext(mat, &ctx)); 1053 if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult)); 1054 if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose)); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 /** 1059 @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators. 1060 1061 Not collective across MPI processes. 1062 1063 @param[in,out] mat MatCeed 1064 @param[out] log_event_mult `PetscLogEvent` for forward evaluation, or NULL 1065 @param[out] log_event_mult_transpose `PetscLogEvent` for transpose evaluation, or NULL 1066 1067 @return An error code: 0 - success, otherwise - failure 1068 **/ 1069 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) { 1070 MatCeedContext ctx; 1071 1072 PetscFunctionBeginUser; 1073 PetscCall(MatShellGetContext(mat, &ctx)); 1074 if (log_event_mult) ctx->log_event_mult = log_event_mult; 1075 if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose; 1076 PetscFunctionReturn(PETSC_SUCCESS); 1077 } 1078 1079 /** 1080 @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators. 1081 1082 Not collective across MPI processes. 1083 1084 @param[in,out] mat MatCeed 1085 @param[out] log_event_mult `PetscLogEvent` for forward evaluation, or NULL 1086 @param[out] log_event_mult_transpose `PetscLogEvent` for transpose evaluation, or NULL 1087 1088 @return An error code: 0 - success, otherwise - failure 1089 **/ 1090 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) { 1091 MatCeedContext ctx; 1092 1093 PetscFunctionBeginUser; 1094 PetscCall(MatShellGetContext(mat, &ctx)); 1095 if (log_event_mult) *log_event_mult = ctx->log_event_mult; 1096 if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose; 1097 PetscFunctionReturn(PETSC_SUCCESS); 1098 } 1099 1100 // ----------------------------------------------------------------------------- 1101 // Operator context data 1102 // ----------------------------------------------------------------------------- 1103 1104 /** 1105 @brief Setup context data for operator application. 1106 1107 Collective across MPI processes. 1108 1109 @param[in] dm_x Input `DM` 1110 @param[in] dm_y Output `DM` 1111 @param[in] X_loc Input PETSc local vector, or NULL 1112 @param[in] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 1113 @param[in] op_mult `CeedOperator` for forward evaluation 1114 @param[in] op_mult_transpose `CeedOperator` for transpose evaluation 1115 @param[in] log_event_mult `PetscLogEvent` for forward evaluation 1116 @param[in] log_event_mult_transpose `PetscLogEvent` for transpose evaluation 1117 @param[out] ctx Context data for operator evaluation 1118 1119 @return An error code: 0 - success, otherwise - failure 1120 **/ 1121 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose, 1122 PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, MatCeedContext *ctx) { 1123 CeedSize x_loc_len, y_loc_len; 1124 1125 PetscFunctionBeginUser; 1126 1127 // Allocate 1128 PetscCall(PetscNew(ctx)); 1129 (*ctx)->ref_count = 1; 1130 1131 // Logging 1132 (*ctx)->log_event_mult = log_event_mult; 1133 (*ctx)->log_event_mult_transpose = log_event_mult_transpose; 1134 1135 // PETSc objects 1136 PetscCall(DMReferenceCopy(dm_x, &(*ctx)->dm_x)); 1137 PetscCall(DMReferenceCopy(dm_y, &(*ctx)->dm_y)); 1138 if (X_loc) PetscCall(VecReferenceCopy(X_loc, &(*ctx)->X_loc)); 1139 if (Y_loc_transpose) PetscCall(VecReferenceCopy(Y_loc_transpose, &(*ctx)->Y_loc_transpose)); 1140 1141 // Memtype 1142 { 1143 const PetscScalar *x; 1144 Vec X; 1145 1146 PetscCall(DMGetLocalVector(dm_x, &X)); 1147 PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type)); 1148 PetscCall(VecRestoreArrayReadAndMemType(X, &x)); 1149 PetscCall(DMRestoreLocalVector(dm_x, &X)); 1150 } 1151 1152 // libCEED objects 1153 PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, 1154 "retrieving Ceed context object failed"); 1155 PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed)); 1156 PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len)); 1157 PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult)); 1158 if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose)); 1159 PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc)); 1160 PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc)); 1161 1162 // Flop counting 1163 { 1164 CeedSize ceed_flops_estimate = 0; 1165 1166 PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate)); 1167 (*ctx)->flops_mult = ceed_flops_estimate; 1168 if (op_mult_transpose) { 1169 PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate)); 1170 (*ctx)->flops_mult_transpose = ceed_flops_estimate; 1171 } 1172 } 1173 1174 // Check sizes 1175 if (x_loc_len > 0 || y_loc_len > 0) { 1176 CeedSize ctx_x_loc_len, ctx_y_loc_len; 1177 PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len; 1178 Vec dm_X_loc, dm_Y_loc; 1179 1180 // -- Input 1181 PetscCall(DMGetLocalVector(dm_x, &dm_X_loc)); 1182 PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len)); 1183 PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc)); 1184 PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len)); 1185 if (X_loc) { 1186 PetscCall(VecGetLocalSize(X_loc, &X_loc_len)); 1187 PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, 1188 "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len); 1189 } 1190 PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", 1191 x_loc_len, dm_x_loc_len); 1192 PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")", 1193 x_loc_len, ctx_x_loc_len); 1194 1195 // -- Output 1196 PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc)); 1197 PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len)); 1198 PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc)); 1199 PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len)); 1200 PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", 1201 ctx_y_loc_len, dm_y_loc_len); 1202 1203 // -- Transpose 1204 if (Y_loc_transpose) { 1205 PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len)); 1206 PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, 1207 "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len); 1208 } 1209 } 1210 PetscFunctionReturn(PETSC_SUCCESS); 1211 } 1212 1213 /** 1214 @brief Increment reference counter for `MATCEED` context. 1215 1216 Not collective across MPI processes. 1217 1218 @param[in,out] ctx Context data 1219 1220 @return An error code: 0 - success, otherwise - failure 1221 **/ 1222 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) { 1223 PetscFunctionBeginUser; 1224 ctx->ref_count++; 1225 PetscFunctionReturn(PETSC_SUCCESS); 1226 } 1227 1228 /** 1229 @brief Copy reference for `MATCEED`. 1230 Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`. 1231 1232 Not collective across MPI processes. 1233 1234 @param[in] ctx Context data 1235 @param[out] ctx_copy Copy of pointer to context data 1236 1237 @return An error code: 0 - success, otherwise - failure 1238 **/ 1239 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) { 1240 PetscFunctionBeginUser; 1241 PetscCall(MatCeedContextReference(ctx)); 1242 PetscCall(MatCeedContextDestroy(*ctx_copy)); 1243 *ctx_copy = ctx; 1244 PetscFunctionReturn(PETSC_SUCCESS); 1245 } 1246 1247 /** 1248 @brief Destroy context data for operator application. 1249 1250 Collective across MPI processes. 1251 1252 @param[in,out] ctx Context data for operator evaluation 1253 1254 @return An error code: 0 - success, otherwise - failure 1255 **/ 1256 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) { 1257 PetscFunctionBeginUser; 1258 if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS); 1259 1260 // PETSc objects 1261 PetscCall(DMDestroy(&ctx->dm_x)); 1262 PetscCall(DMDestroy(&ctx->dm_y)); 1263 PetscCall(VecDestroy(&ctx->X_loc)); 1264 PetscCall(VecDestroy(&ctx->Y_loc_transpose)); 1265 PetscCall(MatDestroy(&ctx->mat_assembled_full_internal)); 1266 PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal)); 1267 PetscCall(PetscFree(ctx->coo_mat_type)); 1268 PetscCall(PetscFree(ctx->mats_assembled_full)); 1269 PetscCall(PetscFree(ctx->mats_assembled_pbd)); 1270 1271 // libCEED objects 1272 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc)); 1273 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc)); 1274 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full)); 1275 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd)); 1276 PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult)); 1277 PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose)); 1278 PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed"); 1279 1280 // Deallocate 1281 ctx->is_destroyed = PETSC_TRUE; // Flag as destroyed in case someone has stale ref 1282 PetscCall(PetscFree(ctx)); 1283 PetscFunctionReturn(PETSC_SUCCESS); 1284 } 1285 1286 /** 1287 @brief Compute the diagonal of an operator via libCEED. 1288 1289 Collective across MPI processes. 1290 1291 @param[in] A `MATCEED` 1292 @param[out] D Vector holding operator diagonal 1293 1294 @return An error code: 0 - success, otherwise - failure 1295 **/ 1296 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) { 1297 PetscMemType mem_type; 1298 Vec D_loc; 1299 MatCeedContext ctx; 1300 1301 PetscFunctionBeginUser; 1302 PetscCall(MatShellGetContext(A, &ctx)); 1303 1304 // Place PETSc vector in libCEED vector 1305 PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc)); 1306 PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc)); 1307 1308 // Compute Diagonal 1309 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE)); 1310 1311 // Restore PETSc vector 1312 PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc)); 1313 1314 // Local-to-Global 1315 PetscCall(VecZeroEntries(D)); 1316 PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D)); 1317 PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc)); 1318 PetscFunctionReturn(PETSC_SUCCESS); 1319 } 1320 1321 /** 1322 @brief Compute `A X = Y` for a `MATCEED`. 1323 1324 Collective across MPI processes. 1325 1326 @param[in] A `MATCEED` 1327 @param[in] X Input PETSc vector 1328 @param[out] Y Output PETSc vector 1329 1330 @return An error code: 0 - success, otherwise - failure 1331 **/ 1332 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) { 1333 MatCeedContext ctx; 1334 1335 PetscFunctionBeginUser; 1336 PetscCall(MatShellGetContext(A, &ctx)); 1337 PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, 0)); 1338 1339 { 1340 PetscMemType x_mem_type, y_mem_type; 1341 Vec X_loc = ctx->X_loc, Y_loc; 1342 1343 // Get local vectors 1344 if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc)); 1345 PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc)); 1346 1347 // Global-to-local 1348 PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc)); 1349 1350 // Setup libCEED vectors 1351 PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc)); 1352 PetscCall(VecZeroEntries(Y_loc)); 1353 PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc)); 1354 1355 // Apply libCEED operator 1356 PetscCall(PetscLogGpuTimeBegin()); 1357 PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE)); 1358 PetscCall(PetscLogGpuTimeEnd()); 1359 1360 // Restore PETSc vectors 1361 PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc)); 1362 PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc)); 1363 1364 // Local-to-global 1365 PetscCall(VecZeroEntries(Y)); 1366 PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y)); 1367 1368 // Restore local vectors, as needed 1369 if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc)); 1370 PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc)); 1371 } 1372 1373 // Log flops 1374 if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult)); 1375 else PetscCall(PetscLogFlops(ctx->flops_mult)); 1376 1377 PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, 0)); 1378 PetscFunctionReturn(PETSC_SUCCESS); 1379 } 1380 1381 /** 1382 @brief Compute `A^T Y = X` for a `MATCEED`. 1383 1384 Collective across MPI processes. 1385 1386 @param[in] A `MATCEED` 1387 @param[in] Y Input PETSc vector 1388 @param[out] X Output PETSc vector 1389 1390 @return An error code: 0 - success, otherwise - failure 1391 **/ 1392 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) { 1393 MatCeedContext ctx; 1394 1395 PetscFunctionBeginUser; 1396 PetscCall(MatShellGetContext(A, &ctx)); 1397 PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, 0)); 1398 1399 { 1400 PetscMemType x_mem_type, y_mem_type; 1401 Vec X_loc, Y_loc = ctx->Y_loc_transpose; 1402 1403 // Get local vectors 1404 if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc)); 1405 PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc)); 1406 1407 // Global-to-local 1408 PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc)); 1409 1410 // Setup libCEED vectors 1411 PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc)); 1412 PetscCall(VecZeroEntries(X_loc)); 1413 PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc)); 1414 1415 // Apply libCEED operator 1416 PetscCall(PetscLogGpuTimeBegin()); 1417 PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE)); 1418 PetscCall(PetscLogGpuTimeEnd()); 1419 1420 // Restore PETSc vectors 1421 PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc)); 1422 PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc)); 1423 1424 // Local-to-global 1425 PetscCall(VecZeroEntries(X)); 1426 PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X)); 1427 1428 // Restore local vectors, as needed 1429 if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc)); 1430 PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc)); 1431 } 1432 1433 // Log flops 1434 if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose)); 1435 else PetscCall(PetscLogFlops(ctx->flops_mult_transpose)); 1436 1437 PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, 0)); 1438 PetscFunctionReturn(PETSC_SUCCESS); 1439 } 1440