1c8564c30SJames Wright /// @file 2c8564c30SJames Wright /// MatCeed and it's related operators 3c8564c30SJames Wright 4d0593705SJames Wright #include <ceed-utils.h> 5c8564c30SJames Wright #include <ceed.h> 6c8564c30SJames Wright #include <ceed/backend.h> 7c8564c30SJames Wright #include <mat-ceed-impl.h> 8c8564c30SJames Wright #include <mat-ceed.h> 9c8564c30SJames Wright #include <petscdmplex.h> 10c8564c30SJames Wright #include <stdlib.h> 11c8564c30SJames Wright #include <string.h> 12c8564c30SJames Wright 13c8564c30SJames Wright PetscClassId MATCEED_CLASSID; 14c8564c30SJames Wright PetscLogEvent MATCEED_MULT, MATCEED_MULT_TRANSPOSE; 15c8564c30SJames Wright 16c8564c30SJames Wright /** 17c8564c30SJames Wright @brief Register MATCEED log events. 18c8564c30SJames Wright 19c8564c30SJames Wright Not collective across MPI processes. 20c8564c30SJames Wright 21c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 22c8564c30SJames Wright **/ 23c8564c30SJames Wright static PetscErrorCode MatCeedRegisterLogEvents() { 24c8564c30SJames Wright static bool registered = false; 25c8564c30SJames Wright 26c8564c30SJames Wright PetscFunctionBeginUser; 27c8564c30SJames Wright if (registered) PetscFunctionReturn(PETSC_SUCCESS); 28c8564c30SJames Wright PetscCall(PetscClassIdRegister("MATCEED", &MATCEED_CLASSID)); 29c8564c30SJames Wright PetscCall(PetscLogEventRegister("MATCEED Mult", MATCEED_CLASSID, &MATCEED_MULT)); 30c8564c30SJames Wright PetscCall(PetscLogEventRegister("MATCEED Mult Transpose", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE)); 31c8564c30SJames Wright registered = true; 32c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 33c8564c30SJames Wright } 34c8564c30SJames Wright 35c8564c30SJames Wright /** 36c8564c30SJames Wright @brief Setup inner `Mat` for `PC` operations not directly supported by libCEED. 37c8564c30SJames Wright 38c8564c30SJames Wright Collective across MPI processes. 39c8564c30SJames Wright 40c8564c30SJames Wright @param[in] mat_ceed `MATCEED` to setup 41c8564c30SJames Wright @param[out] mat_inner Inner `Mat` 42c8564c30SJames Wright 43c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 44c8564c30SJames Wright **/ 45c8564c30SJames Wright static PetscErrorCode MatCeedSetupInnerMat(Mat mat_ceed, Mat *mat_inner) { 46c8564c30SJames Wright MatCeedContext ctx; 47c8564c30SJames Wright 48c8564c30SJames Wright PetscFunctionBeginUser; 49c8564c30SJames Wright PetscCall(MatShellGetContext(mat_ceed, &ctx)); 50c8564c30SJames Wright 51c8564c30SJames Wright PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "PC only supported for MATCEED on a single DM"); 52c8564c30SJames Wright 53c8564c30SJames Wright // Check cl mat type 54c8564c30SJames Wright { 55c8564c30SJames Wright PetscBool is_internal_mat_type_cl = PETSC_FALSE; 56c8564c30SJames Wright char internal_mat_type_cl[64]; 57c8564c30SJames Wright 58c8564c30SJames Wright // Check for specific CL inner mat type for this Mat 59c8564c30SJames Wright { 60c8564c30SJames Wright const char *mat_ceed_prefix = NULL; 61c8564c30SJames Wright 62c8564c30SJames Wright PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix)); 63c8564c30SJames Wright PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL); 64c8564c30SJames Wright PetscCall(PetscOptionsFList("-ceed_inner_mat_type", "MATCEED inner assembled MatType for PC support", NULL, MatList, internal_mat_type_cl, 65c8564c30SJames Wright internal_mat_type_cl, sizeof(internal_mat_type_cl), &is_internal_mat_type_cl)); 66c8564c30SJames Wright PetscOptionsEnd(); 67c8564c30SJames Wright if (is_internal_mat_type_cl) { 68c8564c30SJames Wright PetscCall(PetscFree(ctx->internal_mat_type)); 69c8564c30SJames Wright PetscCall(PetscStrallocpy(internal_mat_type_cl, &ctx->internal_mat_type)); 70c8564c30SJames Wright } 71c8564c30SJames Wright } 72c8564c30SJames Wright } 73c8564c30SJames Wright 74c8564c30SJames Wright // Create sparse matrix 75c8564c30SJames Wright { 76c8564c30SJames Wright MatType dm_mat_type, dm_mat_type_copy; 77c8564c30SJames Wright 78c8564c30SJames Wright PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type)); 79c8564c30SJames Wright PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy)); 80c8564c30SJames Wright PetscCall(DMSetMatType(ctx->dm_x, ctx->internal_mat_type)); 81c8564c30SJames Wright PetscCall(DMCreateMatrix(ctx->dm_x, mat_inner)); 82c8564c30SJames Wright PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy)); 83c8564c30SJames Wright PetscCall(PetscFree(dm_mat_type_copy)); 84c8564c30SJames Wright } 85c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 86c8564c30SJames Wright } 87c8564c30SJames Wright 88c8564c30SJames Wright /** 89c8564c30SJames Wright @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar. 90c8564c30SJames Wright The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`. 91c8564c30SJames Wright The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail. 92c8564c30SJames Wright 93c8564c30SJames Wright Collective across MPI processes. 94c8564c30SJames Wright 95c8564c30SJames Wright @param[in] mat_ceed `MATCEED` to assemble 96c8564c30SJames Wright @param[in,out] mat_coo `MATAIJ` or similar to assemble into 97c8564c30SJames Wright 98c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 99c8564c30SJames Wright **/ 100c8564c30SJames Wright static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) { 101c8564c30SJames Wright MatCeedContext ctx; 102c8564c30SJames Wright 103c8564c30SJames Wright PetscFunctionBeginUser; 104c8564c30SJames Wright PetscCall(MatShellGetContext(mat_ceed, &ctx)); 105c8564c30SJames Wright 106c8564c30SJames Wright // Check if COO pattern set 107c8564c30SJames Wright { 108c8564c30SJames Wright PetscInt index = -1; 109c8564c30SJames Wright 110c8564c30SJames Wright for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) { 111c8564c30SJames Wright if (ctx->mats_assembled_pbd[i] == mat_coo) index = i; 112c8564c30SJames Wright } 113c8564c30SJames Wright if (index == -1) { 114c8564c30SJames Wright PetscInt *rows_petsc = NULL, *cols_petsc = NULL; 115c8564c30SJames Wright CeedInt *rows_ceed, *cols_ceed; 116c8564c30SJames Wright PetscCount num_entries; 117c8564c30SJames Wright PetscLogStage stage_amg_setup; 118c8564c30SJames Wright 119c8564c30SJames Wright // -- Assemble sparsity pattern if mat hasn't been assembled before 120c8564c30SJames Wright PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup)); 121c8564c30SJames Wright if (stage_amg_setup == -1) { 122c8564c30SJames Wright PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup)); 123c8564c30SJames Wright } 124c8564c30SJames Wright PetscCall(PetscLogStagePush(stage_amg_setup)); 12554831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed)); 126d0593705SJames Wright PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc)); 127d0593705SJames Wright PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc)); 128c8564c30SJames Wright PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc)); 129c8564c30SJames Wright free(rows_petsc); 130c8564c30SJames Wright free(cols_petsc); 13154831c5fSJames Wright if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd)); 132c8564c30SJames Wright PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd)); 133c8564c30SJames Wright ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo; 134c8564c30SJames Wright PetscCall(PetscLogStagePop()); 135c8564c30SJames Wright } 136c8564c30SJames Wright } 137c8564c30SJames Wright 138c8564c30SJames Wright // Assemble mat_ceed 139c8564c30SJames Wright PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY)); 140c8564c30SJames Wright { 141c8564c30SJames Wright const CeedScalar *values; 142c8564c30SJames Wright MatType mat_type; 143c8564c30SJames Wright CeedMemType mem_type = CEED_MEM_HOST; 144c8564c30SJames Wright PetscBool is_spd, is_spd_known; 145c8564c30SJames Wright 146c8564c30SJames Wright PetscCall(MatGetType(mat_coo, &mat_type)); 147c8564c30SJames Wright if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; 148c8564c30SJames Wright else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE; 149c8564c30SJames Wright else mem_type = CEED_MEM_HOST; 150c8564c30SJames Wright 15154831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE)); 15254831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values)); 153c8564c30SJames Wright PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES)); 154c8564c30SJames Wright PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd)); 155c8564c30SJames Wright if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd)); 15654831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values)); 157c8564c30SJames Wright } 158c8564c30SJames Wright PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY)); 159c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 160c8564c30SJames Wright } 161c8564c30SJames Wright 162c8564c30SJames Wright /** 163c8564c30SJames Wright @brief Assemble inner `Mat` for diagonal `PC` operations 164c8564c30SJames Wright 165c8564c30SJames Wright Collective across MPI processes. 166c8564c30SJames Wright 167c8564c30SJames Wright @param[in] mat_ceed `MATCEED` to invert 168c8564c30SJames Wright @param[in] use_ceed_pbd Boolean flag to use libCEED PBD assembly 169c8564c30SJames Wright @param[out] mat_inner Inner `Mat` for diagonal operations 170c8564c30SJames Wright 171c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 172c8564c30SJames Wright **/ 173c8564c30SJames Wright static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) { 174c8564c30SJames Wright MatCeedContext ctx; 175c8564c30SJames Wright 176c8564c30SJames Wright PetscFunctionBeginUser; 177c8564c30SJames Wright PetscCall(MatShellGetContext(mat_ceed, &ctx)); 178c8564c30SJames Wright if (use_ceed_pbd) { 179c8564c30SJames Wright // Check if COO pattern set 180c8564c30SJames Wright if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_pbd_internal)); 181c8564c30SJames Wright 182c8564c30SJames Wright // Assemble mat_assembled_full_internal 183c8564c30SJames Wright PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal)); 184c8564c30SJames Wright if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal; 185c8564c30SJames Wright } else { 186c8564c30SJames Wright // Check if COO pattern set 187c8564c30SJames Wright if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_full_internal)); 188c8564c30SJames Wright 189c8564c30SJames Wright // Assemble mat_assembled_full_internal 190c8564c30SJames Wright PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal)); 191c8564c30SJames Wright if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal; 192c8564c30SJames Wright } 193c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 194c8564c30SJames Wright } 195c8564c30SJames Wright 196c8564c30SJames Wright /** 197c8564c30SJames Wright @brief Get `MATCEED` diagonal block for Jacobi. 198c8564c30SJames Wright 199c8564c30SJames Wright Collective across MPI processes. 200c8564c30SJames Wright 201c8564c30SJames Wright @param[in] mat_ceed `MATCEED` to invert 202c8564c30SJames Wright @param[out] mat_block The diagonal block matrix 203c8564c30SJames Wright 204c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 205c8564c30SJames Wright **/ 206c8564c30SJames Wright static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) { 207c8564c30SJames Wright Mat mat_inner = NULL; 208c8564c30SJames Wright MatCeedContext ctx; 209c8564c30SJames Wright 210c8564c30SJames Wright PetscFunctionBeginUser; 211c8564c30SJames Wright PetscCall(MatShellGetContext(mat_ceed, &ctx)); 212c8564c30SJames Wright 213c8564c30SJames Wright // Assemble inner mat if needed 214c8564c30SJames Wright PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner)); 215c8564c30SJames Wright 216c8564c30SJames Wright // Get block diagonal 217c8564c30SJames Wright PetscCall(MatGetDiagonalBlock(mat_inner, mat_block)); 218c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 219c8564c30SJames Wright } 220c8564c30SJames Wright 221c8564c30SJames Wright /** 222c8564c30SJames Wright @brief Invert `MATCEED` diagonal block for Jacobi. 223c8564c30SJames Wright 224c8564c30SJames Wright Collective across MPI processes. 225c8564c30SJames Wright 226c8564c30SJames Wright @param[in] mat_ceed `MATCEED` to invert 227c8564c30SJames Wright @param[out] values The block inverses in column major order 228c8564c30SJames Wright 229c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 230c8564c30SJames Wright **/ 231c8564c30SJames Wright static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) { 232c8564c30SJames Wright Mat mat_inner = NULL; 233c8564c30SJames Wright MatCeedContext ctx; 234c8564c30SJames Wright 235c8564c30SJames Wright PetscFunctionBeginUser; 236c8564c30SJames Wright PetscCall(MatShellGetContext(mat_ceed, &ctx)); 237c8564c30SJames Wright 238c8564c30SJames Wright // Assemble inner mat if needed 239c8564c30SJames Wright PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner)); 240c8564c30SJames Wright 241c8564c30SJames Wright // Invert PB diagonal 242c8564c30SJames Wright PetscCall(MatInvertBlockDiagonal(mat_inner, values)); 243c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 244c8564c30SJames Wright } 245c8564c30SJames Wright 246c8564c30SJames Wright /** 247c8564c30SJames Wright @brief Invert `MATCEED` variable diagonal block for Jacobi. 248c8564c30SJames Wright 249c8564c30SJames Wright Collective across MPI processes. 250c8564c30SJames Wright 251c8564c30SJames Wright @param[in] mat_ceed `MATCEED` to invert 252c8564c30SJames Wright @param[in] num_blocks The number of blocks on the process 253c8564c30SJames Wright @param[in] block_sizes The size of each block on the process 254c8564c30SJames Wright @param[out] values The block inverses in column major order 255c8564c30SJames Wright 256c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 257c8564c30SJames Wright **/ 258c8564c30SJames Wright static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) { 259c8564c30SJames Wright Mat mat_inner = NULL; 260c8564c30SJames Wright MatCeedContext ctx; 261c8564c30SJames Wright 262c8564c30SJames Wright PetscFunctionBeginUser; 263c8564c30SJames Wright PetscCall(MatShellGetContext(mat_ceed, &ctx)); 264c8564c30SJames Wright 265c8564c30SJames Wright // Assemble inner mat if needed 266c8564c30SJames Wright PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner)); 267c8564c30SJames Wright 268c8564c30SJames Wright // Invert PB diagonal 269c8564c30SJames Wright PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values)); 270c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 271c8564c30SJames Wright } 272c8564c30SJames Wright 273*3933d9a0SJames Wright /** 274*3933d9a0SJames Wright @brief View `MATCEED`. 275*3933d9a0SJames Wright 276*3933d9a0SJames Wright Collective across MPI processes. 277*3933d9a0SJames Wright 278*3933d9a0SJames Wright @param[in] mat_ceed `MATCEED` to view 279*3933d9a0SJames Wright @param[in] viewer The visualization context 280*3933d9a0SJames Wright 281*3933d9a0SJames Wright @return An error code: 0 - success, otherwise - failure 282*3933d9a0SJames Wright **/ 283*3933d9a0SJames Wright static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) { 284*3933d9a0SJames Wright PetscBool is_ascii; 285*3933d9a0SJames Wright PetscViewerFormat format; 286*3933d9a0SJames Wright PetscMPIInt size; 287*3933d9a0SJames Wright MatCeedContext ctx; 288*3933d9a0SJames Wright 289*3933d9a0SJames Wright PetscFunctionBeginUser; 290*3933d9a0SJames Wright PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 291*3933d9a0SJames Wright PetscCall(MatShellGetContext(mat_ceed, &ctx)); 292*3933d9a0SJames Wright if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer)); 293*3933d9a0SJames Wright 294*3933d9a0SJames Wright PetscCall(PetscViewerGetFormat(viewer, &format)); 295*3933d9a0SJames Wright PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size)); 296*3933d9a0SJames Wright if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS); 297*3933d9a0SJames Wright 298*3933d9a0SJames Wright PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii)); 299*3933d9a0SJames Wright { 300*3933d9a0SJames Wright FILE *file; 301*3933d9a0SJames Wright 302*3933d9a0SJames Wright PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n Internal MatType:%s\n", ctx->internal_mat_type)); 303*3933d9a0SJames Wright PetscCall(PetscViewerASCIIGetPointer(viewer, &file)); 304*3933d9a0SJames Wright PetscCall(PetscViewerASCIIPrintf(viewer, " libCEED Operator:\n")); 305*3933d9a0SJames Wright PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file)); 306*3933d9a0SJames Wright if (ctx->op_mult_transpose) { 307*3933d9a0SJames Wright PetscCall(PetscViewerASCIIPrintf(viewer, " libCEED Transpose Operator:\n")); 308*3933d9a0SJames Wright PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file)); 309*3933d9a0SJames Wright } 310*3933d9a0SJames Wright } 311*3933d9a0SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 312*3933d9a0SJames Wright } 313*3933d9a0SJames Wright 314c8564c30SJames Wright // ----------------------------------------------------------------------------- 315c8564c30SJames Wright // MatCeed 316c8564c30SJames Wright // ----------------------------------------------------------------------------- 317c8564c30SJames Wright 318c8564c30SJames Wright /** 319c8564c30SJames Wright @brief Create PETSc `Mat` from libCEED operators. 320c8564c30SJames Wright 321c8564c30SJames Wright Collective across MPI processes. 322c8564c30SJames Wright 323c8564c30SJames Wright @param[in] dm_x Input `DM` 324c8564c30SJames Wright @param[in] dm_y Output `DM` 325c8564c30SJames Wright @param[in] op_mult `CeedOperator` for forward evaluation 326c8564c30SJames Wright @param[in] op_mult_transpose `CeedOperator` for transpose evaluation 327c8564c30SJames Wright @param[out] mat New MatCeed 328c8564c30SJames Wright 329c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 330c8564c30SJames Wright **/ 331c8564c30SJames Wright PetscErrorCode MatCeedCreate(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) { 332c8564c30SJames Wright PetscInt X_l_size, X_g_size, Y_l_size, Y_g_size; 333c8564c30SJames Wright VecType vec_type; 334c8564c30SJames Wright MatCeedContext ctx; 335c8564c30SJames Wright 336c8564c30SJames Wright PetscFunctionBeginUser; 337c8564c30SJames Wright PetscCall(MatCeedRegisterLogEvents()); 338c8564c30SJames Wright 339c8564c30SJames Wright // Collect context data 340c8564c30SJames Wright PetscCall(DMGetVecType(dm_x, &vec_type)); 341c8564c30SJames Wright { 342c8564c30SJames Wright Vec X; 343c8564c30SJames Wright 344c8564c30SJames Wright PetscCall(DMGetGlobalVector(dm_x, &X)); 345c8564c30SJames Wright PetscCall(VecGetSize(X, &X_g_size)); 346c8564c30SJames Wright PetscCall(VecGetLocalSize(X, &X_l_size)); 347c8564c30SJames Wright PetscCall(DMRestoreGlobalVector(dm_x, &X)); 348c8564c30SJames Wright } 349c8564c30SJames Wright if (dm_y) { 350c8564c30SJames Wright Vec Y; 351c8564c30SJames Wright 352c8564c30SJames Wright PetscCall(DMGetGlobalVector(dm_y, &Y)); 353c8564c30SJames Wright PetscCall(VecGetSize(Y, &Y_g_size)); 354c8564c30SJames Wright PetscCall(VecGetLocalSize(Y, &Y_l_size)); 355c8564c30SJames Wright PetscCall(DMRestoreGlobalVector(dm_y, &Y)); 356c8564c30SJames Wright } else { 357c8564c30SJames Wright dm_y = dm_x; 358c8564c30SJames Wright Y_g_size = X_g_size; 359c8564c30SJames Wright Y_l_size = X_l_size; 360c8564c30SJames Wright } 361c8564c30SJames Wright // Create context 362c8564c30SJames Wright { 363c8564c30SJames Wright Vec X_loc, Y_loc_transpose = NULL; 364c8564c30SJames Wright 365c8564c30SJames Wright PetscCall(DMCreateLocalVector(dm_x, &X_loc)); 366c8564c30SJames Wright PetscCall(VecZeroEntries(X_loc)); 367c8564c30SJames Wright if (op_mult_transpose) { 368c8564c30SJames Wright PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose)); 369c8564c30SJames Wright PetscCall(VecZeroEntries(Y_loc_transpose)); 370c8564c30SJames Wright } 371c8564c30SJames Wright PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE, &ctx)); 372c8564c30SJames Wright PetscCall(VecDestroy(&X_loc)); 373c8564c30SJames Wright PetscCall(VecDestroy(&Y_loc_transpose)); 374c8564c30SJames Wright } 375c8564c30SJames Wright 376c8564c30SJames Wright // Create mat 377c8564c30SJames Wright PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat)); 378c8564c30SJames Wright PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED)); 379c8564c30SJames Wright // -- Set block and variable block sizes 380c8564c30SJames Wright if (dm_x == dm_y) { 381c8564c30SJames Wright MatType dm_mat_type, dm_mat_type_copy; 382c8564c30SJames Wright Mat temp_mat; 383c8564c30SJames Wright 384c8564c30SJames Wright PetscCall(DMGetMatType(dm_x, &dm_mat_type)); 385c8564c30SJames Wright PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy)); 386c8564c30SJames Wright PetscCall(DMSetMatType(dm_x, MATAIJ)); 387c8564c30SJames Wright PetscCall(DMCreateMatrix(dm_x, &temp_mat)); 388c8564c30SJames Wright PetscCall(DMSetMatType(dm_x, dm_mat_type_copy)); 389c8564c30SJames Wright PetscCall(PetscFree(dm_mat_type_copy)); 390c8564c30SJames Wright 391c8564c30SJames Wright { 392c8564c30SJames Wright PetscInt block_size, num_blocks, max_vblock_size = PETSC_INT_MAX; 393c8564c30SJames Wright const PetscInt *vblock_sizes; 394c8564c30SJames Wright 395c8564c30SJames Wright // -- Get block sizes 396c8564c30SJames Wright PetscCall(MatGetBlockSize(temp_mat, &block_size)); 397c8564c30SJames Wright PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes)); 398c8564c30SJames Wright { 399c8564c30SJames Wright PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX}; 400c8564c30SJames Wright 401c8564c30SJames Wright for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]); 402c8564c30SJames Wright PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max)); 403c8564c30SJames Wright max_vblock_size = global_min_max[1]; 404c8564c30SJames Wright } 405c8564c30SJames Wright 406c8564c30SJames Wright // -- Copy block sizes 407c8564c30SJames Wright if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size)); 408c8564c30SJames Wright if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes)); 409c8564c30SJames Wright 410c8564c30SJames Wright // -- Check libCEED compatibility 411c8564c30SJames Wright { 412c8564c30SJames Wright bool is_composite; 413c8564c30SJames Wright 414c8564c30SJames Wright ctx->is_ceed_pbd_valid = PETSC_TRUE; 415c8564c30SJames Wright ctx->is_ceed_vpbd_valid = PETSC_TRUE; 41654831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite)); 417c8564c30SJames Wright if (is_composite) { 418c8564c30SJames Wright CeedInt num_sub_operators; 419c8564c30SJames Wright CeedOperator *sub_operators; 420c8564c30SJames Wright 42154831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators)); 42254831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators)); 423c8564c30SJames Wright for (CeedInt i = 0; i < num_sub_operators; i++) { 424c8564c30SJames Wright CeedInt num_bases, num_comp; 425c8564c30SJames Wright CeedBasis *active_bases; 426c8564c30SJames Wright CeedOperatorAssemblyData assembly_data; 427c8564c30SJames Wright 42854831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data)); 42954831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL)); 43054831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp)); 431c8564c30SJames Wright if (num_bases > 1) { 432c8564c30SJames Wright ctx->is_ceed_pbd_valid = PETSC_FALSE; 433c8564c30SJames Wright ctx->is_ceed_vpbd_valid = PETSC_FALSE; 434c8564c30SJames Wright } 435c8564c30SJames Wright if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; 436c8564c30SJames Wright if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE; 437c8564c30SJames Wright } 438c8564c30SJames Wright } else { 439c8564c30SJames Wright // LCOV_EXCL_START 440c8564c30SJames Wright CeedInt num_bases, num_comp; 441c8564c30SJames Wright CeedBasis *active_bases; 442c8564c30SJames Wright CeedOperatorAssemblyData assembly_data; 443c8564c30SJames Wright 44454831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data)); 44554831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL)); 44654831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp)); 447c8564c30SJames Wright if (num_bases > 1) { 448c8564c30SJames Wright ctx->is_ceed_pbd_valid = PETSC_FALSE; 449c8564c30SJames Wright ctx->is_ceed_vpbd_valid = PETSC_FALSE; 450c8564c30SJames Wright } 451c8564c30SJames Wright if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; 452c8564c30SJames Wright if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE; 453c8564c30SJames Wright // LCOV_EXCL_STOP 454c8564c30SJames Wright } 455c8564c30SJames Wright { 456c8564c30SJames Wright PetscInt local_is_valid[2], global_is_valid[2]; 457c8564c30SJames Wright 458c8564c30SJames Wright local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid; 459c8564c30SJames Wright PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid)); 460c8564c30SJames Wright ctx->is_ceed_pbd_valid = global_is_valid[0]; 461c8564c30SJames Wright local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid; 462c8564c30SJames Wright PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid)); 463c8564c30SJames Wright ctx->is_ceed_vpbd_valid = global_is_valid[0]; 464c8564c30SJames Wright } 465c8564c30SJames Wright } 466c8564c30SJames Wright } 467c8564c30SJames Wright PetscCall(MatDestroy(&temp_mat)); 468c8564c30SJames Wright } 469c8564c30SJames Wright // -- Set internal mat type 470c8564c30SJames Wright { 471c8564c30SJames Wright VecType vec_type; 472c8564c30SJames Wright MatType internal_mat_type = MATAIJ; 473c8564c30SJames Wright 474c8564c30SJames Wright PetscCall(VecGetType(ctx->X_loc, &vec_type)); 475c8564c30SJames Wright if (strstr(vec_type, VECCUDA)) internal_mat_type = MATAIJCUSPARSE; 476c8564c30SJames Wright else if (strstr(vec_type, VECKOKKOS)) internal_mat_type = MATAIJKOKKOS; 477c8564c30SJames Wright else internal_mat_type = MATAIJ; 478c8564c30SJames Wright PetscCall(PetscStrallocpy(internal_mat_type, &ctx->internal_mat_type)); 479c8564c30SJames Wright } 480c8564c30SJames Wright // -- Set mat operations 481c8564c30SJames Wright PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy)); 482*3933d9a0SJames Wright PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed)); 483c8564c30SJames Wright PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed)); 484c8564c30SJames Wright if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed)); 485c8564c30SJames Wright PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed)); 486c8564c30SJames Wright PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed)); 487c8564c30SJames Wright PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed)); 488c8564c30SJames Wright PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed)); 489c8564c30SJames Wright PetscCall(MatShellSetVecType(*mat, vec_type)); 490c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 491c8564c30SJames Wright } 492c8564c30SJames Wright 493c8564c30SJames Wright /** 494c8564c30SJames Wright @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`. 495c8564c30SJames Wright 496c8564c30SJames Wright Collective across MPI processes. 497c8564c30SJames Wright 498c8564c30SJames Wright @param[in] mat_ceed `MATCEED` to copy from 499c8564c30SJames Wright @param[out] mat_other `MatShell` or `MATCEED` to copy into 500c8564c30SJames Wright 501c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 502c8564c30SJames Wright **/ 503c8564c30SJames Wright PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) { 504c8564c30SJames Wright PetscFunctionBeginUser; 505c8564c30SJames Wright PetscCall(MatCeedRegisterLogEvents()); 506c8564c30SJames Wright 507c8564c30SJames Wright // Check type compatibility 508c8564c30SJames Wright { 509c8564c30SJames Wright MatType mat_type_ceed, mat_type_other; 510c8564c30SJames Wright 511c8564c30SJames Wright PetscCall(MatGetType(mat_ceed, &mat_type_ceed)); 512c8564c30SJames Wright PetscCheck(!strcmp(mat_type_ceed, MATCEED), PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED); 513c8564c30SJames Wright PetscCall(MatGetType(mat_ceed, &mat_type_other)); 514c8564c30SJames Wright PetscCheck(!strcmp(mat_type_other, MATCEED) || !strcmp(mat_type_other, MATSHELL), PETSC_COMM_SELF, PETSC_ERR_LIB, 515c8564c30SJames Wright "mat_other must have type " MATCEED " or " MATSHELL); 516c8564c30SJames Wright } 517c8564c30SJames Wright 518c8564c30SJames Wright // Check dimension compatibility 519c8564c30SJames Wright { 520c8564c30SJames Wright PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size; 521c8564c30SJames Wright 522c8564c30SJames Wright PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size)); 523c8564c30SJames Wright PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size)); 524c8564c30SJames Wright PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size)); 525c8564c30SJames Wright PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size)); 526c8564c30SJames Wright PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) && 527c8564c30SJames Wright (X_l_ceed_size == X_l_other_size), 528c8564c30SJames Wright PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, 529c8564c30SJames Wright "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT 530c8564c30SJames Wright "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT 531c8564c30SJames Wright ", %" PetscInt_FMT ")", 532c8564c30SJames Wright Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size); 533c8564c30SJames Wright } 534c8564c30SJames Wright 535c8564c30SJames Wright // Convert 536c8564c30SJames Wright { 537c8564c30SJames Wright VecType vec_type; 538c8564c30SJames Wright MatCeedContext ctx; 539c8564c30SJames Wright 540c8564c30SJames Wright PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED)); 541c8564c30SJames Wright PetscCall(MatShellGetContext(mat_ceed, &ctx)); 542c8564c30SJames Wright PetscCall(MatCeedContextReference(ctx)); 543c8564c30SJames Wright PetscCall(MatShellSetContext(mat_other, ctx)); 544c8564c30SJames Wright PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy)); 545*3933d9a0SJames Wright PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed)); 546c8564c30SJames Wright PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed)); 547c8564c30SJames Wright if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed)); 548c8564c30SJames Wright PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed)); 549c8564c30SJames Wright PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed)); 550c8564c30SJames Wright PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed)); 551c8564c30SJames Wright PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed)); 552c8564c30SJames Wright { 553c8564c30SJames Wright PetscInt block_size; 554c8564c30SJames Wright 555c8564c30SJames Wright PetscCall(MatGetBlockSize(mat_ceed, &block_size)); 556c8564c30SJames Wright if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size)); 557c8564c30SJames Wright } 558c8564c30SJames Wright { 559c8564c30SJames Wright PetscInt num_blocks; 560c8564c30SJames Wright const PetscInt *block_sizes; 561c8564c30SJames Wright 562c8564c30SJames Wright PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes)); 563c8564c30SJames Wright if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes)); 564c8564c30SJames Wright } 565c8564c30SJames Wright PetscCall(DMGetVecType(ctx->dm_x, &vec_type)); 566c8564c30SJames Wright PetscCall(MatShellSetVecType(mat_other, vec_type)); 567c8564c30SJames Wright } 568c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 569c8564c30SJames Wright } 570c8564c30SJames Wright 571c8564c30SJames Wright /** 572c8564c30SJames Wright @brief Assemble a `MATCEED` into a `MATAIJ` or similar. 573c8564c30SJames Wright The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`. 574c8564c30SJames Wright The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail. 575c8564c30SJames Wright 576c8564c30SJames Wright Collective across MPI processes. 577c8564c30SJames Wright 578c8564c30SJames Wright @param[in] mat_ceed `MATCEED` to assemble 579c8564c30SJames Wright @param[in,out] mat_coo `MATAIJ` or similar to assemble into 580c8564c30SJames Wright 581c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 582c8564c30SJames Wright **/ 583c8564c30SJames Wright PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) { 584c8564c30SJames Wright MatCeedContext ctx; 585c8564c30SJames Wright 586c8564c30SJames Wright PetscFunctionBeginUser; 587c8564c30SJames Wright PetscCall(MatShellGetContext(mat_ceed, &ctx)); 588c8564c30SJames Wright 589c8564c30SJames Wright // Check if COO pattern set 590c8564c30SJames Wright { 591c8564c30SJames Wright PetscInt index = -1; 592c8564c30SJames Wright 593c8564c30SJames Wright for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) { 594c8564c30SJames Wright if (ctx->mats_assembled_full[i] == mat_coo) index = i; 595c8564c30SJames Wright } 596c8564c30SJames Wright if (index == -1) { 597c8564c30SJames Wright PetscInt *rows_petsc = NULL, *cols_petsc = NULL; 598c8564c30SJames Wright CeedInt *rows_ceed, *cols_ceed; 599c8564c30SJames Wright PetscCount num_entries; 600c8564c30SJames Wright PetscLogStage stage_amg_setup; 601c8564c30SJames Wright 602c8564c30SJames Wright // -- Assemble sparsity pattern if mat hasn't been assembled before 603c8564c30SJames Wright PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup)); 604c8564c30SJames Wright if (stage_amg_setup == -1) { 605c8564c30SJames Wright PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup)); 606c8564c30SJames Wright } 607c8564c30SJames Wright PetscCall(PetscLogStagePush(stage_amg_setup)); 60854831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed)); 609d0593705SJames Wright PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc)); 610d0593705SJames Wright PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc)); 611c8564c30SJames Wright PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc)); 612c8564c30SJames Wright free(rows_petsc); 613c8564c30SJames Wright free(cols_petsc); 61454831c5fSJames Wright if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full)); 615c8564c30SJames Wright PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full)); 616c8564c30SJames Wright ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo; 617c8564c30SJames Wright PetscCall(PetscLogStagePop()); 618c8564c30SJames Wright } 619c8564c30SJames Wright } 620c8564c30SJames Wright 621c8564c30SJames Wright // Assemble mat_ceed 622c8564c30SJames Wright PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY)); 623c8564c30SJames Wright { 624c8564c30SJames Wright const CeedScalar *values; 625c8564c30SJames Wright MatType mat_type; 626c8564c30SJames Wright CeedMemType mem_type = CEED_MEM_HOST; 627c8564c30SJames Wright PetscBool is_spd, is_spd_known; 628c8564c30SJames Wright 629c8564c30SJames Wright PetscCall(MatGetType(mat_coo, &mat_type)); 630c8564c30SJames Wright if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; 631c8564c30SJames Wright else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE; 632c8564c30SJames Wright else mem_type = CEED_MEM_HOST; 633c8564c30SJames Wright 63454831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full)); 63554831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values)); 636c8564c30SJames Wright PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES)); 637c8564c30SJames Wright PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd)); 638c8564c30SJames Wright if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd)); 63954831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values)); 640c8564c30SJames Wright } 641c8564c30SJames Wright PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY)); 642c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 643c8564c30SJames Wright } 644c8564c30SJames Wright 645c8564c30SJames Wright /** 646c8564c30SJames Wright @brief Set user context for a `MATCEED`. 647c8564c30SJames Wright 648c8564c30SJames Wright Collective across MPI processes. 649c8564c30SJames Wright 650c8564c30SJames Wright @param[in,out] mat `MATCEED` 651c8564c30SJames Wright @param[in] f The context destroy function, or NULL 652c8564c30SJames Wright @param[in] ctx User context, or NULL to unset 653c8564c30SJames Wright 654c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 655c8564c30SJames Wright **/ 656c8564c30SJames Wright PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) { 657c8564c30SJames Wright PetscContainer user_ctx = NULL; 658c8564c30SJames Wright 659c8564c30SJames Wright PetscFunctionBeginUser; 660c8564c30SJames Wright if (ctx) { 661c8564c30SJames Wright PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx)); 662c8564c30SJames Wright PetscCall(PetscContainerSetPointer(user_ctx, ctx)); 663c8564c30SJames Wright PetscCall(PetscContainerSetUserDestroy(user_ctx, f)); 664c8564c30SJames Wright } 665c8564c30SJames Wright PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx)); 666c8564c30SJames Wright PetscCall(PetscContainerDestroy(&user_ctx)); 667c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 668c8564c30SJames Wright } 669c8564c30SJames Wright 670c8564c30SJames Wright /** 671c8564c30SJames Wright @brief Retrieve the user context for a `MATCEED`. 672c8564c30SJames Wright 673c8564c30SJames Wright Collective across MPI processes. 674c8564c30SJames Wright 675c8564c30SJames Wright @param[in,out] mat `MATCEED` 676c8564c30SJames Wright @param[in] ctx User context 677c8564c30SJames Wright 678c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 679c8564c30SJames Wright **/ 680c8564c30SJames Wright PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) { 681c8564c30SJames Wright PetscContainer user_ctx; 682c8564c30SJames Wright 683c8564c30SJames Wright PetscFunctionBeginUser; 684c8564c30SJames Wright PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx)); 685c8564c30SJames Wright if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx)); 686c8564c30SJames Wright else *(void **)ctx = NULL; 687c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 688c8564c30SJames Wright } 689c8564c30SJames Wright 690c8564c30SJames Wright /** 691c8564c30SJames Wright @brief Sets the inner matrix type as a string from the `MATCEED`. 692c8564c30SJames Wright 693c8564c30SJames Wright Collective across MPI processes. 694c8564c30SJames Wright 695c8564c30SJames Wright @param[in,out] mat `MATCEED` 696c8564c30SJames Wright @param[in] type Inner `MatType` to set 697c8564c30SJames Wright 698c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 699c8564c30SJames Wright **/ 700c8564c30SJames Wright PetscErrorCode MatCeedSetInnerMatType(Mat mat, MatType type) { 701c8564c30SJames Wright MatCeedContext ctx; 702c8564c30SJames Wright 703c8564c30SJames Wright PetscFunctionBeginUser; 704c8564c30SJames Wright PetscCall(MatShellGetContext(mat, &ctx)); 705c8564c30SJames Wright // Check if same 706c8564c30SJames Wright { 707c8564c30SJames Wright size_t len_old, len_new; 708c8564c30SJames Wright PetscBool is_same = PETSC_FALSE; 709c8564c30SJames Wright 710c8564c30SJames Wright PetscCall(PetscStrlen(ctx->internal_mat_type, &len_old)); 711c8564c30SJames Wright PetscCall(PetscStrlen(type, &len_new)); 712c8564c30SJames Wright if (len_old == len_new) PetscCall(PetscStrncmp(ctx->internal_mat_type, type, len_old, &is_same)); 713c8564c30SJames Wright if (is_same) PetscFunctionReturn(PETSC_SUCCESS); 714c8564c30SJames Wright } 715c8564c30SJames Wright // Clean up old mats in different format 716c8564c30SJames Wright // LCOV_EXCL_START 717c8564c30SJames Wright if (ctx->mat_assembled_full_internal) { 718c8564c30SJames Wright for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) { 719c8564c30SJames Wright if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) { 720c8564c30SJames Wright for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) { 721c8564c30SJames Wright ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j]; 722c8564c30SJames Wright } 723c8564c30SJames Wright ctx->num_mats_assembled_full--; 724c8564c30SJames Wright // Note: we'll realloc this array again, so no need to shrink the allocation 725c8564c30SJames Wright PetscCall(MatDestroy(&ctx->mat_assembled_full_internal)); 726c8564c30SJames Wright } 727c8564c30SJames Wright } 728c8564c30SJames Wright } 729c8564c30SJames Wright if (ctx->mat_assembled_pbd_internal) { 730c8564c30SJames Wright for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) { 731c8564c30SJames Wright if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) { 732c8564c30SJames Wright for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) { 733c8564c30SJames Wright ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j]; 734c8564c30SJames Wright } 735c8564c30SJames Wright // Note: we'll realloc this array again, so no need to shrink the allocation 736c8564c30SJames Wright ctx->num_mats_assembled_pbd--; 737c8564c30SJames Wright PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal)); 738c8564c30SJames Wright } 739c8564c30SJames Wright } 740c8564c30SJames Wright } 741c8564c30SJames Wright PetscCall(PetscFree(ctx->internal_mat_type)); 742c8564c30SJames Wright PetscCall(PetscStrallocpy(type, &ctx->internal_mat_type)); 743c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 744c8564c30SJames Wright // LCOV_EXCL_STOP 745c8564c30SJames Wright } 746c8564c30SJames Wright 747c8564c30SJames Wright /** 748c8564c30SJames Wright @brief Gets the inner matrix type as a string from the `MATCEED`. 749c8564c30SJames Wright 750c8564c30SJames Wright Collective across MPI processes. 751c8564c30SJames Wright 752c8564c30SJames Wright @param[in,out] mat `MATCEED` 753c8564c30SJames Wright @param[in] type Inner `MatType` 754c8564c30SJames Wright 755c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 756c8564c30SJames Wright **/ 757c8564c30SJames Wright PetscErrorCode MatCeedGetInnerMatType(Mat mat, MatType *type) { 758c8564c30SJames Wright MatCeedContext ctx; 759c8564c30SJames Wright 760c8564c30SJames Wright PetscFunctionBeginUser; 761c8564c30SJames Wright PetscCall(MatShellGetContext(mat, &ctx)); 762c8564c30SJames Wright *type = ctx->internal_mat_type; 763c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 764c8564c30SJames Wright } 765c8564c30SJames Wright 766c8564c30SJames Wright /** 767c8564c30SJames Wright @brief Set a user defined matrix operation for a `MATCEED` matrix. 768c8564c30SJames Wright 769c8564c30SJames Wright Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by 770c8564c30SJames Wright `MatCeedSetContext()`. 771c8564c30SJames Wright 772c8564c30SJames Wright Collective across MPI processes. 773c8564c30SJames Wright 774c8564c30SJames Wright @param[in,out] mat `MATCEED` 775c8564c30SJames Wright @param[in] op Name of the `MatOperation` 776c8564c30SJames Wright @param[in] g Function that provides the operation 777c8564c30SJames Wright 778c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 779c8564c30SJames Wright **/ 780c8564c30SJames Wright PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) { 781c8564c30SJames Wright PetscFunctionBeginUser; 782c8564c30SJames Wright PetscCall(MatShellSetOperation(mat, op, g)); 783c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 784c8564c30SJames Wright } 785c8564c30SJames Wright 786c8564c30SJames Wright /** 787c8564c30SJames Wright @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 788c8564c30SJames Wright 789c8564c30SJames Wright Not collective across MPI processes. 790c8564c30SJames Wright 791c8564c30SJames Wright @param[in,out] mat `MATCEED` 792c8564c30SJames Wright @param[in] X_loc Input PETSc local vector, or NULL 793c8564c30SJames Wright @param[in] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 794c8564c30SJames Wright 795c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 796c8564c30SJames Wright **/ 797c8564c30SJames Wright PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) { 798c8564c30SJames Wright MatCeedContext ctx; 799c8564c30SJames Wright 800c8564c30SJames Wright PetscFunctionBeginUser; 801c8564c30SJames Wright PetscCall(MatShellGetContext(mat, &ctx)); 802c8564c30SJames Wright if (X_loc) { 803c8564c30SJames Wright PetscInt len_old, len_new; 804c8564c30SJames Wright 805c8564c30SJames Wright PetscCall(VecGetSize(ctx->X_loc, &len_old)); 806c8564c30SJames Wright PetscCall(VecGetSize(X_loc, &len_new)); 807c8564c30SJames Wright PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT, 808c8564c30SJames Wright len_new, len_old); 809c8564c30SJames Wright PetscCall(VecDestroy(&ctx->X_loc)); 810c8564c30SJames Wright ctx->X_loc = X_loc; 811c8564c30SJames Wright PetscCall(PetscObjectReference((PetscObject)X_loc)); 812c8564c30SJames Wright } 813c8564c30SJames Wright if (Y_loc_transpose) { 814c8564c30SJames Wright PetscInt len_old, len_new; 815c8564c30SJames Wright 816c8564c30SJames Wright PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old)); 817c8564c30SJames Wright PetscCall(VecGetSize(Y_loc_transpose, &len_new)); 818c8564c30SJames Wright PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, 819c8564c30SJames Wright "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old); 820c8564c30SJames Wright PetscCall(VecDestroy(&ctx->Y_loc_transpose)); 821c8564c30SJames Wright ctx->Y_loc_transpose = Y_loc_transpose; 822c8564c30SJames Wright PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose)); 823c8564c30SJames Wright } 824c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 825c8564c30SJames Wright } 826c8564c30SJames Wright 827c8564c30SJames Wright /** 828c8564c30SJames Wright @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 829c8564c30SJames Wright 830c8564c30SJames Wright Not collective across MPI processes. 831c8564c30SJames Wright 832c8564c30SJames Wright @param[in,out] mat `MATCEED` 833c8564c30SJames Wright @param[out] X_loc Input PETSc local vector, or NULL 834c8564c30SJames Wright @param[out] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 835c8564c30SJames Wright 836c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 837c8564c30SJames Wright **/ 838c8564c30SJames Wright PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) { 839c8564c30SJames Wright MatCeedContext ctx; 840c8564c30SJames Wright 841c8564c30SJames Wright PetscFunctionBeginUser; 842c8564c30SJames Wright PetscCall(MatShellGetContext(mat, &ctx)); 843c8564c30SJames Wright if (X_loc) { 844c8564c30SJames Wright *X_loc = ctx->X_loc; 845c8564c30SJames Wright PetscCall(PetscObjectReference((PetscObject)*X_loc)); 846c8564c30SJames Wright } 847c8564c30SJames Wright if (Y_loc_transpose) { 848c8564c30SJames Wright *Y_loc_transpose = ctx->Y_loc_transpose; 849c8564c30SJames Wright PetscCall(PetscObjectReference((PetscObject)*Y_loc_transpose)); 850c8564c30SJames Wright } 851c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 852c8564c30SJames Wright } 853c8564c30SJames Wright 854c8564c30SJames Wright /** 855c8564c30SJames Wright @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 856c8564c30SJames Wright 857c8564c30SJames Wright Not collective across MPI processes. 858c8564c30SJames Wright 859c8564c30SJames Wright @param[in,out] mat MatCeed 860c8564c30SJames Wright @param[out] X_loc Input PETSc local vector, or NULL 861c8564c30SJames Wright @param[out] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 862c8564c30SJames Wright 863c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 864c8564c30SJames Wright **/ 865c8564c30SJames Wright PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) { 866c8564c30SJames Wright PetscFunctionBeginUser; 867c8564c30SJames Wright if (X_loc) PetscCall(VecDestroy(X_loc)); 868c8564c30SJames Wright if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose)); 869c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 870c8564c30SJames Wright } 871c8564c30SJames Wright 872c8564c30SJames Wright /** 873c8564c30SJames Wright @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 874c8564c30SJames Wright 875c8564c30SJames Wright Not collective across MPI processes. 876c8564c30SJames Wright 877c8564c30SJames Wright @param[in,out] mat MatCeed 878c8564c30SJames Wright @param[out] op_mult libCEED `CeedOperator` for `MatMult()`, or NULL 879c8564c30SJames Wright @param[out] op_mult_transpose libCEED `CeedOperator` for `MatMultTranspose()`, or NULL 880c8564c30SJames Wright 881c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 882c8564c30SJames Wright **/ 883c8564c30SJames Wright PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) { 884c8564c30SJames Wright MatCeedContext ctx; 885c8564c30SJames Wright 886c8564c30SJames Wright PetscFunctionBeginUser; 887c8564c30SJames Wright PetscCall(MatShellGetContext(mat, &ctx)); 888c8564c30SJames Wright if (op_mult) { 889c8564c30SJames Wright *op_mult = NULL; 89054831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult)); 891c8564c30SJames Wright } 892c8564c30SJames Wright if (op_mult_transpose) { 893c8564c30SJames Wright *op_mult_transpose = NULL; 89454831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose)); 895c8564c30SJames Wright } 896c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 897c8564c30SJames Wright } 898c8564c30SJames Wright 899c8564c30SJames Wright /** 900c8564c30SJames Wright @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 901c8564c30SJames Wright 902c8564c30SJames Wright Not collective across MPI processes. 903c8564c30SJames Wright 904c8564c30SJames Wright @param[in,out] mat MatCeed 905c8564c30SJames Wright @param[out] op_mult libCEED `CeedOperator` for `MatMult()`, or NULL 906c8564c30SJames Wright @param[out] op_mult_transpose libCEED `CeedOperator` for `MatMultTranspose()`, or NULL 907c8564c30SJames Wright 908c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 909c8564c30SJames Wright **/ 910c8564c30SJames Wright PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) { 911c8564c30SJames Wright MatCeedContext ctx; 912c8564c30SJames Wright 913c8564c30SJames Wright PetscFunctionBeginUser; 914c8564c30SJames Wright PetscCall(MatShellGetContext(mat, &ctx)); 91554831c5fSJames Wright if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult)); 91654831c5fSJames Wright if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose)); 917c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 918c8564c30SJames Wright } 919c8564c30SJames Wright 920c8564c30SJames Wright /** 921c8564c30SJames Wright @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators. 922c8564c30SJames Wright 923c8564c30SJames Wright Not collective across MPI processes. 924c8564c30SJames Wright 925c8564c30SJames Wright @param[in,out] mat MatCeed 926c8564c30SJames Wright @param[out] log_event_mult `PetscLogEvent` for forward evaluation, or NULL 927c8564c30SJames Wright @param[out] log_event_mult_transpose `PetscLogEvent` for transpose evaluation, or NULL 928c8564c30SJames Wright 929c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 930c8564c30SJames Wright **/ 931c8564c30SJames Wright PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) { 932c8564c30SJames Wright MatCeedContext ctx; 933c8564c30SJames Wright 934c8564c30SJames Wright PetscFunctionBeginUser; 935c8564c30SJames Wright PetscCall(MatShellGetContext(mat, &ctx)); 936c8564c30SJames Wright if (log_event_mult) ctx->log_event_mult = log_event_mult; 937c8564c30SJames Wright if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose; 938c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 939c8564c30SJames Wright } 940c8564c30SJames Wright 941c8564c30SJames Wright /** 942c8564c30SJames Wright @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators. 943c8564c30SJames Wright 944c8564c30SJames Wright Not collective across MPI processes. 945c8564c30SJames Wright 946c8564c30SJames Wright @param[in,out] mat MatCeed 947c8564c30SJames Wright @param[out] log_event_mult `PetscLogEvent` for forward evaluation, or NULL 948c8564c30SJames Wright @param[out] log_event_mult_transpose `PetscLogEvent` for transpose evaluation, or NULL 949c8564c30SJames Wright 950c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 951c8564c30SJames Wright **/ 952c8564c30SJames Wright PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) { 953c8564c30SJames Wright MatCeedContext ctx; 954c8564c30SJames Wright 955c8564c30SJames Wright PetscFunctionBeginUser; 956c8564c30SJames Wright PetscCall(MatShellGetContext(mat, &ctx)); 957c8564c30SJames Wright if (log_event_mult) *log_event_mult = ctx->log_event_mult; 958c8564c30SJames Wright if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose; 959c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 960c8564c30SJames Wright } 961c8564c30SJames Wright 962c8564c30SJames Wright // ----------------------------------------------------------------------------- 963c8564c30SJames Wright // Operator context data 964c8564c30SJames Wright // ----------------------------------------------------------------------------- 965c8564c30SJames Wright 966c8564c30SJames Wright /** 967c8564c30SJames Wright @brief Setup context data for operator application. 968c8564c30SJames Wright 969c8564c30SJames Wright Collective across MPI processes. 970c8564c30SJames Wright 971c8564c30SJames Wright @param[in] dm_x Input `DM` 972c8564c30SJames Wright @param[in] dm_y Output `DM` 973c8564c30SJames Wright @param[in] X_loc Input PETSc local vector, or NULL 974c8564c30SJames Wright @param[in] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 975c8564c30SJames Wright @param[in] op_mult `CeedOperator` for forward evaluation 976c8564c30SJames Wright @param[in] op_mult_transpose `CeedOperator` for transpose evaluation 977c8564c30SJames Wright @param[in] log_event_mult `PetscLogEvent` for forward evaluation 978c8564c30SJames Wright @param[in] log_event_mult_transpose `PetscLogEvent` for transpose evaluation 979c8564c30SJames Wright @param[out] ctx Context data for operator evaluation 980c8564c30SJames Wright 981c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 982c8564c30SJames Wright **/ 983c8564c30SJames Wright PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose, 984c8564c30SJames Wright PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, MatCeedContext *ctx) { 985c8564c30SJames Wright CeedSize x_loc_len, y_loc_len; 986c8564c30SJames Wright 987c8564c30SJames Wright PetscFunctionBeginUser; 988c8564c30SJames Wright 989c8564c30SJames Wright // Allocate 990c8564c30SJames Wright PetscCall(PetscNew(ctx)); 991c8564c30SJames Wright (*ctx)->ref_count = 1; 992c8564c30SJames Wright 993c8564c30SJames Wright // Logging 994c8564c30SJames Wright (*ctx)->log_event_mult = log_event_mult; 995c8564c30SJames Wright (*ctx)->log_event_mult_transpose = log_event_mult_transpose; 996c8564c30SJames Wright 997c8564c30SJames Wright // PETSc objects 998c8564c30SJames Wright PetscCall(PetscObjectReference((PetscObject)dm_x)); 999c8564c30SJames Wright (*ctx)->dm_x = dm_x; 1000c8564c30SJames Wright PetscCall(PetscObjectReference((PetscObject)dm_y)); 1001c8564c30SJames Wright (*ctx)->dm_y = dm_y; 1002c8564c30SJames Wright if (X_loc) PetscCall(PetscObjectReference((PetscObject)X_loc)); 1003c8564c30SJames Wright (*ctx)->X_loc = X_loc; 1004c8564c30SJames Wright if (Y_loc_transpose) PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose)); 1005c8564c30SJames Wright (*ctx)->Y_loc_transpose = Y_loc_transpose; 1006c8564c30SJames Wright 1007c8564c30SJames Wright // Memtype 1008c8564c30SJames Wright { 1009c8564c30SJames Wright const PetscScalar *x; 1010c8564c30SJames Wright Vec X; 1011c8564c30SJames Wright 1012c8564c30SJames Wright PetscCall(DMGetLocalVector(dm_x, &X)); 1013c8564c30SJames Wright PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type)); 1014c8564c30SJames Wright PetscCall(VecRestoreArrayReadAndMemType(X, &x)); 1015c8564c30SJames Wright PetscCall(DMRestoreLocalVector(dm_x, &X)); 1016c8564c30SJames Wright } 1017c8564c30SJames Wright 1018c8564c30SJames Wright // libCEED objects 1019c8564c30SJames Wright PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, 1020c8564c30SJames Wright "retrieving Ceed context object failed"); 102154831c5fSJames Wright PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed)); 102254831c5fSJames Wright PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len)); 102354831c5fSJames Wright PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult)); 102454831c5fSJames Wright if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose)); 102554831c5fSJames Wright PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc)); 102654831c5fSJames Wright PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc)); 1027c8564c30SJames Wright 1028c8564c30SJames Wright // Flop counting 1029c8564c30SJames Wright { 1030c8564c30SJames Wright CeedSize ceed_flops_estimate = 0; 1031c8564c30SJames Wright 103254831c5fSJames Wright PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate)); 1033c8564c30SJames Wright (*ctx)->flops_mult = ceed_flops_estimate; 1034c8564c30SJames Wright if (op_mult_transpose) { 103554831c5fSJames Wright PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate)); 1036c8564c30SJames Wright (*ctx)->flops_mult_transpose = ceed_flops_estimate; 1037c8564c30SJames Wright } 1038c8564c30SJames Wright } 1039c8564c30SJames Wright 1040c8564c30SJames Wright // Check sizes 1041c8564c30SJames Wright if (x_loc_len > 0 || y_loc_len > 0) { 1042c8564c30SJames Wright CeedSize ctx_x_loc_len, ctx_y_loc_len; 1043c8564c30SJames Wright PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len; 1044c8564c30SJames Wright Vec dm_X_loc, dm_Y_loc; 1045c8564c30SJames Wright 1046c8564c30SJames Wright // -- Input 1047c8564c30SJames Wright PetscCall(DMGetLocalVector(dm_x, &dm_X_loc)); 1048c8564c30SJames Wright PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len)); 1049c8564c30SJames Wright PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc)); 105054831c5fSJames Wright PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len)); 1051c86b9822SJames Wright if (X_loc) { 1052c86b9822SJames Wright PetscCall(VecGetLocalSize(X_loc, &X_loc_len)); 1053c86b9822SJames Wright PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, 1054c86b9822SJames Wright "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len); 1055c86b9822SJames Wright } 1056c86b9822SJames Wright PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", 1057c86b9822SJames Wright x_loc_len, dm_x_loc_len); 1058c86b9822SJames Wright PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")", 1059c86b9822SJames Wright x_loc_len, ctx_x_loc_len); 1060c8564c30SJames Wright 1061c8564c30SJames Wright // -- Output 1062c8564c30SJames Wright PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc)); 1063c8564c30SJames Wright PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len)); 1064c8564c30SJames Wright PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc)); 106554831c5fSJames Wright PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len)); 1066c86b9822SJames Wright PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", 1067c86b9822SJames Wright ctx_y_loc_len, dm_y_loc_len); 1068c8564c30SJames Wright 1069c8564c30SJames Wright // -- Transpose 1070c8564c30SJames Wright if (Y_loc_transpose) { 1071c8564c30SJames Wright PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len)); 1072c86b9822SJames Wright PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, 1073c86b9822SJames Wright "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len); 1074c8564c30SJames Wright } 1075c8564c30SJames Wright } 1076c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 1077c8564c30SJames Wright } 1078c8564c30SJames Wright 1079c8564c30SJames Wright /** 1080c8564c30SJames Wright @brief Increment reference counter for `MATCEED` context. 1081c8564c30SJames Wright 1082c8564c30SJames Wright Not collective across MPI processes. 1083c8564c30SJames Wright 1084c8564c30SJames Wright @param[in,out] ctx Context data 1085c8564c30SJames Wright 1086c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 1087c8564c30SJames Wright **/ 1088c8564c30SJames Wright PetscErrorCode MatCeedContextReference(MatCeedContext ctx) { 1089c8564c30SJames Wright PetscFunctionBeginUser; 1090c8564c30SJames Wright ctx->ref_count++; 1091c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 1092c8564c30SJames Wright } 1093c8564c30SJames Wright 1094c8564c30SJames Wright /** 1095c8564c30SJames Wright @brief Copy reference for `MATCEED`. 1096c8564c30SJames Wright Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`. 1097c8564c30SJames Wright 1098c8564c30SJames Wright Not collective across MPI processes. 1099c8564c30SJames Wright 1100c8564c30SJames Wright @param[in] ctx Context data 1101c8564c30SJames Wright @param[out] ctx_copy Copy of pointer to context data 1102c8564c30SJames Wright 1103c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 1104c8564c30SJames Wright **/ 1105c8564c30SJames Wright PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) { 1106c8564c30SJames Wright PetscFunctionBeginUser; 1107c8564c30SJames Wright PetscCall(MatCeedContextReference(ctx)); 1108c8564c30SJames Wright PetscCall(MatCeedContextDestroy(*ctx_copy)); 1109c8564c30SJames Wright *ctx_copy = ctx; 1110c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 1111c8564c30SJames Wright } 1112c8564c30SJames Wright 1113c8564c30SJames Wright /** 1114c8564c30SJames Wright @brief Destroy context data for operator application. 1115c8564c30SJames Wright 1116c8564c30SJames Wright Collective across MPI processes. 1117c8564c30SJames Wright 1118c8564c30SJames Wright @param[in,out] ctx Context data for operator evaluation 1119c8564c30SJames Wright 1120c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 1121c8564c30SJames Wright **/ 1122c8564c30SJames Wright PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) { 1123c8564c30SJames Wright PetscFunctionBeginUser; 1124c8564c30SJames Wright if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS); 1125c8564c30SJames Wright 1126c8564c30SJames Wright // PETSc objects 1127c8564c30SJames Wright PetscCall(DMDestroy(&ctx->dm_x)); 1128c8564c30SJames Wright PetscCall(DMDestroy(&ctx->dm_y)); 1129c8564c30SJames Wright PetscCall(VecDestroy(&ctx->X_loc)); 1130c8564c30SJames Wright PetscCall(VecDestroy(&ctx->Y_loc_transpose)); 1131c8564c30SJames Wright PetscCall(MatDestroy(&ctx->mat_assembled_full_internal)); 1132c8564c30SJames Wright PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal)); 1133c8564c30SJames Wright PetscCall(PetscFree(ctx->internal_mat_type)); 1134c8564c30SJames Wright PetscCall(PetscFree(ctx->mats_assembled_full)); 1135c8564c30SJames Wright PetscCall(PetscFree(ctx->mats_assembled_pbd)); 1136c8564c30SJames Wright 1137c8564c30SJames Wright // libCEED objects 113854831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc)); 113954831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc)); 114054831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full)); 114154831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd)); 114254831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult)); 114354831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose)); 1144c8564c30SJames Wright PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed"); 1145c8564c30SJames Wright 1146c8564c30SJames Wright // Deallocate 1147c8564c30SJames Wright ctx->is_destroyed = PETSC_TRUE; // Flag as destroyed in case someone has stale ref 1148c8564c30SJames Wright PetscCall(PetscFree(ctx)); 1149c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 1150c8564c30SJames Wright } 1151c8564c30SJames Wright 1152c8564c30SJames Wright /** 1153c8564c30SJames Wright @brief Compute the diagonal of an operator via libCEED. 1154c8564c30SJames Wright 1155c8564c30SJames Wright Collective across MPI processes. 1156c8564c30SJames Wright 1157c8564c30SJames Wright @param[in] A `MATCEED` 1158c8564c30SJames Wright @param[out] D Vector holding operator diagonal 1159c8564c30SJames Wright 1160c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 1161c8564c30SJames Wright **/ 1162c8564c30SJames Wright PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) { 1163c8564c30SJames Wright PetscMemType mem_type; 1164c8564c30SJames Wright Vec D_loc; 1165c8564c30SJames Wright MatCeedContext ctx; 1166c8564c30SJames Wright 1167c8564c30SJames Wright PetscFunctionBeginUser; 1168c8564c30SJames Wright PetscCall(MatShellGetContext(A, &ctx)); 1169c8564c30SJames Wright 1170c8564c30SJames Wright // Place PETSc vector in libCEED vector 1171c8564c30SJames Wright PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc)); 1172d0593705SJames Wright PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc)); 1173c8564c30SJames Wright 1174c8564c30SJames Wright // Compute Diagonal 117554831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE)); 1176c8564c30SJames Wright 1177c8564c30SJames Wright // Restore PETSc vector 1178d0593705SJames Wright PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc)); 1179c8564c30SJames Wright 1180c8564c30SJames Wright // Local-to-Global 1181c8564c30SJames Wright PetscCall(VecZeroEntries(D)); 1182c8564c30SJames Wright PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D)); 1183c8564c30SJames Wright PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc)); 1184c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 1185c8564c30SJames Wright } 1186c8564c30SJames Wright 1187c8564c30SJames Wright /** 1188c8564c30SJames Wright @brief Compute `A X = Y` for a `MATCEED`. 1189c8564c30SJames Wright 1190c8564c30SJames Wright Collective across MPI processes. 1191c8564c30SJames Wright 1192c8564c30SJames Wright @param[in] A `MATCEED` 1193c8564c30SJames Wright @param[in] X Input PETSc vector 1194c8564c30SJames Wright @param[out] Y Output PETSc vector 1195c8564c30SJames Wright 1196c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 1197c8564c30SJames Wright **/ 1198c8564c30SJames Wright PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) { 1199c8564c30SJames Wright MatCeedContext ctx; 1200c8564c30SJames Wright 1201c8564c30SJames Wright PetscFunctionBeginUser; 1202c8564c30SJames Wright PetscCall(MatShellGetContext(A, &ctx)); 1203c8564c30SJames Wright PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, 0)); 1204c8564c30SJames Wright 1205c8564c30SJames Wright { 1206c8564c30SJames Wright PetscMemType x_mem_type, y_mem_type; 1207c8564c30SJames Wright Vec X_loc = ctx->X_loc, Y_loc; 1208c8564c30SJames Wright 1209c8564c30SJames Wright // Get local vectors 1210c8564c30SJames Wright if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc)); 1211c8564c30SJames Wright PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc)); 1212c8564c30SJames Wright 1213c8564c30SJames Wright // Global-to-local 1214c8564c30SJames Wright PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc)); 1215c8564c30SJames Wright 1216c8564c30SJames Wright // Setup libCEED vectors 1217d0593705SJames Wright PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc)); 1218c8564c30SJames Wright PetscCall(VecZeroEntries(Y_loc)); 1219d0593705SJames Wright PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc)); 1220c8564c30SJames Wright 1221c8564c30SJames Wright // Apply libCEED operator 1222c8564c30SJames Wright PetscCall(PetscLogGpuTimeBegin()); 122354831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE)); 1224c8564c30SJames Wright PetscCall(PetscLogGpuTimeEnd()); 1225c8564c30SJames Wright 1226c8564c30SJames Wright // Restore PETSc vectors 1227d0593705SJames Wright PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc)); 1228d0593705SJames Wright PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc)); 1229c8564c30SJames Wright 1230c8564c30SJames Wright // Local-to-global 1231c8564c30SJames Wright PetscCall(VecZeroEntries(Y)); 1232c8564c30SJames Wright PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y)); 1233c8564c30SJames Wright 1234c8564c30SJames Wright // Restore local vectors, as needed 1235c8564c30SJames Wright if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc)); 1236c8564c30SJames Wright PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc)); 1237c8564c30SJames Wright } 1238c8564c30SJames Wright 1239c8564c30SJames Wright // Log flops 1240c8564c30SJames Wright if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult)); 1241c8564c30SJames Wright else PetscCall(PetscLogFlops(ctx->flops_mult)); 1242c8564c30SJames Wright 1243c8564c30SJames Wright PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, 0)); 1244c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 1245c8564c30SJames Wright } 1246c8564c30SJames Wright 1247c8564c30SJames Wright /** 1248c8564c30SJames Wright @brief Compute `A^T Y = X` for a `MATCEED`. 1249c8564c30SJames Wright 1250c8564c30SJames Wright Collective across MPI processes. 1251c8564c30SJames Wright 1252c8564c30SJames Wright @param[in] A `MATCEED` 1253c8564c30SJames Wright @param[in] Y Input PETSc vector 1254c8564c30SJames Wright @param[out] X Output PETSc vector 1255c8564c30SJames Wright 1256c8564c30SJames Wright @return An error code: 0 - success, otherwise - failure 1257c8564c30SJames Wright **/ 1258c8564c30SJames Wright PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) { 1259c8564c30SJames Wright MatCeedContext ctx; 1260c8564c30SJames Wright 1261c8564c30SJames Wright PetscFunctionBeginUser; 1262c8564c30SJames Wright PetscCall(MatShellGetContext(A, &ctx)); 1263c8564c30SJames Wright PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, 0)); 1264c8564c30SJames Wright 1265c8564c30SJames Wright { 1266c8564c30SJames Wright PetscMemType x_mem_type, y_mem_type; 1267c8564c30SJames Wright Vec X_loc, Y_loc = ctx->Y_loc_transpose; 1268c8564c30SJames Wright 1269c8564c30SJames Wright // Get local vectors 1270c8564c30SJames Wright if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc)); 1271c8564c30SJames Wright PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc)); 1272c8564c30SJames Wright 1273c8564c30SJames Wright // Global-to-local 1274c8564c30SJames Wright PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc)); 1275c8564c30SJames Wright 1276c8564c30SJames Wright // Setup libCEED vectors 1277d0593705SJames Wright PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc)); 1278c8564c30SJames Wright PetscCall(VecZeroEntries(X_loc)); 1279d0593705SJames Wright PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc)); 1280c8564c30SJames Wright 1281c8564c30SJames Wright // Apply libCEED operator 1282c8564c30SJames Wright PetscCall(PetscLogGpuTimeBegin()); 128354831c5fSJames Wright PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE)); 1284c8564c30SJames Wright PetscCall(PetscLogGpuTimeEnd()); 1285c8564c30SJames Wright 1286c8564c30SJames Wright // Restore PETSc vectors 1287d0593705SJames Wright PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc)); 1288d0593705SJames Wright PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc)); 1289c8564c30SJames Wright 1290c8564c30SJames Wright // Local-to-global 1291c8564c30SJames Wright PetscCall(VecZeroEntries(X)); 1292c8564c30SJames Wright PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X)); 1293c8564c30SJames Wright 1294c8564c30SJames Wright // Restore local vectors, as needed 1295c8564c30SJames Wright if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc)); 1296c8564c30SJames Wright PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc)); 1297c8564c30SJames Wright } 1298c8564c30SJames Wright 1299c8564c30SJames Wright // Log flops 1300c8564c30SJames Wright if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose)); 1301c8564c30SJames Wright else PetscCall(PetscLogFlops(ctx->flops_mult_transpose)); 1302c8564c30SJames Wright 1303c8564c30SJames Wright PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, 0)); 1304c8564c30SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 1305c8564c30SJames Wright } 1306