1 /// @file 2 /// MatCeed and it's related operators 3 4 #include <ceed-utils.h> 5 #include <ceed.h> 6 #include <ceed/backend.h> 7 #include <mat-ceed-impl.h> 8 #include <mat-ceed.h> 9 #include <petscdmplex.h> 10 #include <stdlib.h> 11 #include <string.h> 12 13 PetscClassId MATCEED_CLASSID; 14 PetscLogEvent MATCEED_MULT, MATCEED_MULT_TRANSPOSE; 15 16 /** 17 @brief Register MATCEED log events. 18 19 Not collective across MPI processes. 20 21 @return An error code: 0 - success, otherwise - failure 22 **/ 23 static PetscErrorCode MatCeedRegisterLogEvents() { 24 static bool registered = false; 25 26 PetscFunctionBeginUser; 27 if (registered) PetscFunctionReturn(PETSC_SUCCESS); 28 PetscCall(PetscClassIdRegister("MATCEED", &MATCEED_CLASSID)); 29 PetscCall(PetscLogEventRegister("MATCEED Mult", MATCEED_CLASSID, &MATCEED_MULT)); 30 PetscCall(PetscLogEventRegister("MATCEED Mult Transpose", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE)); 31 registered = true; 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 /** 36 @brief Setup inner `Mat` for `PC` operations not directly supported by libCEED. 37 38 Collective across MPI processes. 39 40 @param[in] mat_ceed `MATCEED` to setup 41 @param[out] mat_inner Inner `Mat` 42 43 @return An error code: 0 - success, otherwise - failure 44 **/ 45 static PetscErrorCode MatCeedSetupInnerMat(Mat mat_ceed, Mat *mat_inner) { 46 MatCeedContext ctx; 47 48 PetscFunctionBeginUser; 49 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 50 51 PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "PC only supported for MATCEED on a single DM"); 52 53 // Check cl mat type 54 { 55 PetscBool is_internal_mat_type_cl = PETSC_FALSE; 56 char internal_mat_type_cl[64]; 57 58 // Check for specific CL inner mat type for this Mat 59 { 60 const char *mat_ceed_prefix = NULL; 61 62 PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix)); 63 PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL); 64 PetscCall(PetscOptionsFList("-ceed_inner_mat_type", "MATCEED inner assembled MatType for PC support", NULL, MatList, internal_mat_type_cl, 65 internal_mat_type_cl, sizeof(internal_mat_type_cl), &is_internal_mat_type_cl)); 66 PetscOptionsEnd(); 67 if (is_internal_mat_type_cl) { 68 PetscCall(PetscFree(ctx->internal_mat_type)); 69 PetscCall(PetscStrallocpy(internal_mat_type_cl, &ctx->internal_mat_type)); 70 } 71 } 72 } 73 74 // Create sparse matrix 75 { 76 MatType dm_mat_type, dm_mat_type_copy; 77 78 PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type)); 79 PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy)); 80 PetscCall(DMSetMatType(ctx->dm_x, ctx->internal_mat_type)); 81 PetscCall(DMCreateMatrix(ctx->dm_x, mat_inner)); 82 PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy)); 83 PetscCall(PetscFree(dm_mat_type_copy)); 84 } 85 PetscFunctionReturn(PETSC_SUCCESS); 86 } 87 88 /** 89 @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar. 90 The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`. 91 The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail. 92 93 Collective across MPI processes. 94 95 @param[in] mat_ceed `MATCEED` to assemble 96 @param[in,out] mat_coo `MATAIJ` or similar to assemble into 97 98 @return An error code: 0 - success, otherwise - failure 99 **/ 100 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) { 101 MatCeedContext ctx; 102 103 PetscFunctionBeginUser; 104 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 105 106 // Check if COO pattern set 107 { 108 PetscInt index = -1; 109 110 for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) { 111 if (ctx->mats_assembled_pbd[i] == mat_coo) index = i; 112 } 113 if (index == -1) { 114 PetscInt *rows_petsc = NULL, *cols_petsc = NULL; 115 CeedInt *rows_ceed, *cols_ceed; 116 PetscCount num_entries; 117 PetscLogStage stage_amg_setup; 118 119 // -- Assemble sparsity pattern if mat hasn't been assembled before 120 PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup)); 121 if (stage_amg_setup == -1) { 122 PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup)); 123 } 124 PetscCall(PetscLogStagePush(stage_amg_setup)); 125 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed)); 126 PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc)); 127 PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc)); 128 PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc)); 129 free(rows_petsc); 130 free(cols_petsc); 131 if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd)); 132 PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd)); 133 ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo; 134 PetscCall(PetscLogStagePop()); 135 } 136 } 137 138 // Assemble mat_ceed 139 PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY)); 140 { 141 const CeedScalar *values; 142 MatType mat_type; 143 CeedMemType mem_type = CEED_MEM_HOST; 144 PetscBool is_spd, is_spd_known; 145 146 PetscCall(MatGetType(mat_coo, &mat_type)); 147 if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; 148 else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE; 149 else mem_type = CEED_MEM_HOST; 150 151 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE)); 152 PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values)); 153 PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES)); 154 PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd)); 155 if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd)); 156 PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values)); 157 } 158 PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY)); 159 PetscFunctionReturn(PETSC_SUCCESS); 160 } 161 162 /** 163 @brief Assemble inner `Mat` for diagonal `PC` operations 164 165 Collective across MPI processes. 166 167 @param[in] mat_ceed `MATCEED` to invert 168 @param[in] use_ceed_pbd Boolean flag to use libCEED PBD assembly 169 @param[out] mat_inner Inner `Mat` for diagonal operations 170 171 @return An error code: 0 - success, otherwise - failure 172 **/ 173 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) { 174 MatCeedContext ctx; 175 176 PetscFunctionBeginUser; 177 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 178 if (use_ceed_pbd) { 179 // Check if COO pattern set 180 if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_pbd_internal)); 181 182 // Assemble mat_assembled_full_internal 183 PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal)); 184 if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal; 185 } else { 186 // Check if COO pattern set 187 if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_full_internal)); 188 189 // Assemble mat_assembled_full_internal 190 PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal)); 191 if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal; 192 } 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 /** 197 @brief Get `MATCEED` diagonal block for Jacobi. 198 199 Collective across MPI processes. 200 201 @param[in] mat_ceed `MATCEED` to invert 202 @param[out] mat_block The diagonal block matrix 203 204 @return An error code: 0 - success, otherwise - failure 205 **/ 206 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) { 207 Mat mat_inner = NULL; 208 MatCeedContext ctx; 209 210 PetscFunctionBeginUser; 211 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 212 213 // Assemble inner mat if needed 214 PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner)); 215 216 // Get block diagonal 217 PetscCall(MatGetDiagonalBlock(mat_inner, mat_block)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 /** 222 @brief Invert `MATCEED` diagonal block for Jacobi. 223 224 Collective across MPI processes. 225 226 @param[in] mat_ceed `MATCEED` to invert 227 @param[out] values The block inverses in column major order 228 229 @return An error code: 0 - success, otherwise - failure 230 **/ 231 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) { 232 Mat mat_inner = NULL; 233 MatCeedContext ctx; 234 235 PetscFunctionBeginUser; 236 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 237 238 // Assemble inner mat if needed 239 PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner)); 240 241 // Invert PB diagonal 242 PetscCall(MatInvertBlockDiagonal(mat_inner, values)); 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 /** 247 @brief Invert `MATCEED` variable diagonal block for Jacobi. 248 249 Collective across MPI processes. 250 251 @param[in] mat_ceed `MATCEED` to invert 252 @param[in] num_blocks The number of blocks on the process 253 @param[in] block_sizes The size of each block on the process 254 @param[out] values The block inverses in column major order 255 256 @return An error code: 0 - success, otherwise - failure 257 **/ 258 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) { 259 Mat mat_inner = NULL; 260 MatCeedContext ctx; 261 262 PetscFunctionBeginUser; 263 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 264 265 // Assemble inner mat if needed 266 PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner)); 267 268 // Invert PB diagonal 269 PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 // ----------------------------------------------------------------------------- 274 // MatCeed 275 // ----------------------------------------------------------------------------- 276 277 /** 278 @brief Create PETSc `Mat` from libCEED operators. 279 280 Collective across MPI processes. 281 282 @param[in] dm_x Input `DM` 283 @param[in] dm_y Output `DM` 284 @param[in] op_mult `CeedOperator` for forward evaluation 285 @param[in] op_mult_transpose `CeedOperator` for transpose evaluation 286 @param[out] mat New MatCeed 287 288 @return An error code: 0 - success, otherwise - failure 289 **/ 290 PetscErrorCode MatCeedCreate(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) { 291 PetscInt X_l_size, X_g_size, Y_l_size, Y_g_size; 292 VecType vec_type; 293 MatCeedContext ctx; 294 295 PetscFunctionBeginUser; 296 PetscCall(MatCeedRegisterLogEvents()); 297 298 // Collect context data 299 PetscCall(DMGetVecType(dm_x, &vec_type)); 300 { 301 Vec X; 302 303 PetscCall(DMGetGlobalVector(dm_x, &X)); 304 PetscCall(VecGetSize(X, &X_g_size)); 305 PetscCall(VecGetLocalSize(X, &X_l_size)); 306 PetscCall(DMRestoreGlobalVector(dm_x, &X)); 307 } 308 if (dm_y) { 309 Vec Y; 310 311 PetscCall(DMGetGlobalVector(dm_y, &Y)); 312 PetscCall(VecGetSize(Y, &Y_g_size)); 313 PetscCall(VecGetLocalSize(Y, &Y_l_size)); 314 PetscCall(DMRestoreGlobalVector(dm_y, &Y)); 315 } else { 316 dm_y = dm_x; 317 Y_g_size = X_g_size; 318 Y_l_size = X_l_size; 319 } 320 // Create context 321 { 322 Vec X_loc, Y_loc_transpose = NULL; 323 324 PetscCall(DMCreateLocalVector(dm_x, &X_loc)); 325 PetscCall(VecZeroEntries(X_loc)); 326 if (op_mult_transpose) { 327 PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose)); 328 PetscCall(VecZeroEntries(Y_loc_transpose)); 329 } 330 PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE, &ctx)); 331 PetscCall(VecDestroy(&X_loc)); 332 PetscCall(VecDestroy(&Y_loc_transpose)); 333 } 334 335 // Create mat 336 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat)); 337 PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED)); 338 // -- Set block and variable block sizes 339 if (dm_x == dm_y) { 340 MatType dm_mat_type, dm_mat_type_copy; 341 Mat temp_mat; 342 343 PetscCall(DMGetMatType(dm_x, &dm_mat_type)); 344 PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy)); 345 PetscCall(DMSetMatType(dm_x, MATAIJ)); 346 PetscCall(DMCreateMatrix(dm_x, &temp_mat)); 347 PetscCall(DMSetMatType(dm_x, dm_mat_type_copy)); 348 PetscCall(PetscFree(dm_mat_type_copy)); 349 350 { 351 PetscInt block_size, num_blocks, max_vblock_size = PETSC_INT_MAX; 352 const PetscInt *vblock_sizes; 353 354 // -- Get block sizes 355 PetscCall(MatGetBlockSize(temp_mat, &block_size)); 356 PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes)); 357 { 358 PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX}; 359 360 for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]); 361 PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max)); 362 max_vblock_size = global_min_max[1]; 363 } 364 365 // -- Copy block sizes 366 if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size)); 367 if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes)); 368 369 // -- Check libCEED compatibility 370 { 371 bool is_composite; 372 373 ctx->is_ceed_pbd_valid = PETSC_TRUE; 374 ctx->is_ceed_vpbd_valid = PETSC_TRUE; 375 PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite)); 376 if (is_composite) { 377 CeedInt num_sub_operators; 378 CeedOperator *sub_operators; 379 380 PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators)); 381 PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators)); 382 for (CeedInt i = 0; i < num_sub_operators; i++) { 383 CeedInt num_bases, num_comp; 384 CeedBasis *active_bases; 385 CeedOperatorAssemblyData assembly_data; 386 387 PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data)); 388 PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL)); 389 PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp)); 390 if (num_bases > 1) { 391 ctx->is_ceed_pbd_valid = PETSC_FALSE; 392 ctx->is_ceed_vpbd_valid = PETSC_FALSE; 393 } 394 if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; 395 if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE; 396 } 397 } else { 398 // LCOV_EXCL_START 399 CeedInt num_bases, num_comp; 400 CeedBasis *active_bases; 401 CeedOperatorAssemblyData assembly_data; 402 403 PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data)); 404 PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL)); 405 PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp)); 406 if (num_bases > 1) { 407 ctx->is_ceed_pbd_valid = PETSC_FALSE; 408 ctx->is_ceed_vpbd_valid = PETSC_FALSE; 409 } 410 if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; 411 if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE; 412 // LCOV_EXCL_STOP 413 } 414 { 415 PetscInt local_is_valid[2], global_is_valid[2]; 416 417 local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid; 418 PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid)); 419 ctx->is_ceed_pbd_valid = global_is_valid[0]; 420 local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid; 421 PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid)); 422 ctx->is_ceed_vpbd_valid = global_is_valid[0]; 423 } 424 } 425 } 426 PetscCall(MatDestroy(&temp_mat)); 427 } 428 // -- Set internal mat type 429 { 430 VecType vec_type; 431 MatType internal_mat_type = MATAIJ; 432 433 PetscCall(VecGetType(ctx->X_loc, &vec_type)); 434 if (strstr(vec_type, VECCUDA)) internal_mat_type = MATAIJCUSPARSE; 435 else if (strstr(vec_type, VECKOKKOS)) internal_mat_type = MATAIJKOKKOS; 436 else internal_mat_type = MATAIJ; 437 PetscCall(PetscStrallocpy(internal_mat_type, &ctx->internal_mat_type)); 438 } 439 // -- Set mat operations 440 PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy)); 441 PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed)); 442 if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed)); 443 PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed)); 444 PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed)); 445 PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed)); 446 PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed)); 447 PetscCall(MatShellSetVecType(*mat, vec_type)); 448 PetscFunctionReturn(PETSC_SUCCESS); 449 } 450 451 /** 452 @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`. 453 454 Collective across MPI processes. 455 456 @param[in] mat_ceed `MATCEED` to copy from 457 @param[out] mat_other `MatShell` or `MATCEED` to copy into 458 459 @return An error code: 0 - success, otherwise - failure 460 **/ 461 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) { 462 PetscFunctionBeginUser; 463 PetscCall(MatCeedRegisterLogEvents()); 464 465 // Check type compatibility 466 { 467 MatType mat_type_ceed, mat_type_other; 468 469 PetscCall(MatGetType(mat_ceed, &mat_type_ceed)); 470 PetscCheck(!strcmp(mat_type_ceed, MATCEED), PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED); 471 PetscCall(MatGetType(mat_ceed, &mat_type_other)); 472 PetscCheck(!strcmp(mat_type_other, MATCEED) || !strcmp(mat_type_other, MATSHELL), PETSC_COMM_SELF, PETSC_ERR_LIB, 473 "mat_other must have type " MATCEED " or " MATSHELL); 474 } 475 476 // Check dimension compatibility 477 { 478 PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size; 479 480 PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size)); 481 PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size)); 482 PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size)); 483 PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size)); 484 PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) && 485 (X_l_ceed_size == X_l_other_size), 486 PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, 487 "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT 488 "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT 489 ", %" PetscInt_FMT ")", 490 Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size); 491 } 492 493 // Convert 494 { 495 VecType vec_type; 496 MatCeedContext ctx; 497 498 PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED)); 499 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 500 PetscCall(MatCeedContextReference(ctx)); 501 PetscCall(MatShellSetContext(mat_other, ctx)); 502 PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy)); 503 PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed)); 504 if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed)); 505 PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed)); 506 PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed)); 507 PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed)); 508 PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed)); 509 { 510 PetscInt block_size; 511 512 PetscCall(MatGetBlockSize(mat_ceed, &block_size)); 513 if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size)); 514 } 515 { 516 PetscInt num_blocks; 517 const PetscInt *block_sizes; 518 519 PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes)); 520 if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes)); 521 } 522 PetscCall(DMGetVecType(ctx->dm_x, &vec_type)); 523 PetscCall(MatShellSetVecType(mat_other, vec_type)); 524 } 525 PetscFunctionReturn(PETSC_SUCCESS); 526 } 527 528 /** 529 @brief Assemble a `MATCEED` into a `MATAIJ` or similar. 530 The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`. 531 The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail. 532 533 Collective across MPI processes. 534 535 @param[in] mat_ceed `MATCEED` to assemble 536 @param[in,out] mat_coo `MATAIJ` or similar to assemble into 537 538 @return An error code: 0 - success, otherwise - failure 539 **/ 540 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) { 541 MatCeedContext ctx; 542 543 PetscFunctionBeginUser; 544 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 545 546 // Check if COO pattern set 547 { 548 PetscInt index = -1; 549 550 for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) { 551 if (ctx->mats_assembled_full[i] == mat_coo) index = i; 552 } 553 if (index == -1) { 554 PetscInt *rows_petsc = NULL, *cols_petsc = NULL; 555 CeedInt *rows_ceed, *cols_ceed; 556 PetscCount num_entries; 557 PetscLogStage stage_amg_setup; 558 559 // -- Assemble sparsity pattern if mat hasn't been assembled before 560 PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup)); 561 if (stage_amg_setup == -1) { 562 PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup)); 563 } 564 PetscCall(PetscLogStagePush(stage_amg_setup)); 565 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed)); 566 PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc)); 567 PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc)); 568 PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc)); 569 free(rows_petsc); 570 free(cols_petsc); 571 if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full)); 572 PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full)); 573 ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo; 574 PetscCall(PetscLogStagePop()); 575 } 576 } 577 578 // Assemble mat_ceed 579 PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY)); 580 { 581 const CeedScalar *values; 582 MatType mat_type; 583 CeedMemType mem_type = CEED_MEM_HOST; 584 PetscBool is_spd, is_spd_known; 585 586 PetscCall(MatGetType(mat_coo, &mat_type)); 587 if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; 588 else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE; 589 else mem_type = CEED_MEM_HOST; 590 591 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full)); 592 PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values)); 593 PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES)); 594 PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd)); 595 if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd)); 596 PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values)); 597 } 598 PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY)); 599 PetscFunctionReturn(PETSC_SUCCESS); 600 } 601 602 /** 603 @brief Set user context for a `MATCEED`. 604 605 Collective across MPI processes. 606 607 @param[in,out] mat `MATCEED` 608 @param[in] f The context destroy function, or NULL 609 @param[in] ctx User context, or NULL to unset 610 611 @return An error code: 0 - success, otherwise - failure 612 **/ 613 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) { 614 PetscContainer user_ctx = NULL; 615 616 PetscFunctionBeginUser; 617 if (ctx) { 618 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx)); 619 PetscCall(PetscContainerSetPointer(user_ctx, ctx)); 620 PetscCall(PetscContainerSetUserDestroy(user_ctx, f)); 621 } 622 PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx)); 623 PetscCall(PetscContainerDestroy(&user_ctx)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 /** 628 @brief Retrieve the user context for a `MATCEED`. 629 630 Collective across MPI processes. 631 632 @param[in,out] mat `MATCEED` 633 @param[in] ctx User context 634 635 @return An error code: 0 - success, otherwise - failure 636 **/ 637 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) { 638 PetscContainer user_ctx; 639 640 PetscFunctionBeginUser; 641 PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx)); 642 if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx)); 643 else *(void **)ctx = NULL; 644 PetscFunctionReturn(PETSC_SUCCESS); 645 } 646 647 /** 648 @brief Sets the inner matrix type as a string from the `MATCEED`. 649 650 Collective across MPI processes. 651 652 @param[in,out] mat `MATCEED` 653 @param[in] type Inner `MatType` to set 654 655 @return An error code: 0 - success, otherwise - failure 656 **/ 657 PetscErrorCode MatCeedSetInnerMatType(Mat mat, MatType type) { 658 MatCeedContext ctx; 659 660 PetscFunctionBeginUser; 661 PetscCall(MatShellGetContext(mat, &ctx)); 662 // Check if same 663 { 664 size_t len_old, len_new; 665 PetscBool is_same = PETSC_FALSE; 666 667 PetscCall(PetscStrlen(ctx->internal_mat_type, &len_old)); 668 PetscCall(PetscStrlen(type, &len_new)); 669 if (len_old == len_new) PetscCall(PetscStrncmp(ctx->internal_mat_type, type, len_old, &is_same)); 670 if (is_same) PetscFunctionReturn(PETSC_SUCCESS); 671 } 672 // Clean up old mats in different format 673 // LCOV_EXCL_START 674 if (ctx->mat_assembled_full_internal) { 675 for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) { 676 if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) { 677 for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) { 678 ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j]; 679 } 680 ctx->num_mats_assembled_full--; 681 // Note: we'll realloc this array again, so no need to shrink the allocation 682 PetscCall(MatDestroy(&ctx->mat_assembled_full_internal)); 683 } 684 } 685 } 686 if (ctx->mat_assembled_pbd_internal) { 687 for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) { 688 if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) { 689 for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) { 690 ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j]; 691 } 692 // Note: we'll realloc this array again, so no need to shrink the allocation 693 ctx->num_mats_assembled_pbd--; 694 PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal)); 695 } 696 } 697 } 698 PetscCall(PetscFree(ctx->internal_mat_type)); 699 PetscCall(PetscStrallocpy(type, &ctx->internal_mat_type)); 700 PetscFunctionReturn(PETSC_SUCCESS); 701 // LCOV_EXCL_STOP 702 } 703 704 /** 705 @brief Gets the inner matrix type as a string from the `MATCEED`. 706 707 Collective across MPI processes. 708 709 @param[in,out] mat `MATCEED` 710 @param[in] type Inner `MatType` 711 712 @return An error code: 0 - success, otherwise - failure 713 **/ 714 PetscErrorCode MatCeedGetInnerMatType(Mat mat, MatType *type) { 715 MatCeedContext ctx; 716 717 PetscFunctionBeginUser; 718 PetscCall(MatShellGetContext(mat, &ctx)); 719 *type = ctx->internal_mat_type; 720 PetscFunctionReturn(PETSC_SUCCESS); 721 } 722 723 /** 724 @brief Set a user defined matrix operation for a `MATCEED` matrix. 725 726 Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by 727 `MatCeedSetContext()`. 728 729 Collective across MPI processes. 730 731 @param[in,out] mat `MATCEED` 732 @param[in] op Name of the `MatOperation` 733 @param[in] g Function that provides the operation 734 735 @return An error code: 0 - success, otherwise - failure 736 **/ 737 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) { 738 PetscFunctionBeginUser; 739 PetscCall(MatShellSetOperation(mat, op, g)); 740 PetscFunctionReturn(PETSC_SUCCESS); 741 } 742 743 /** 744 @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 745 746 Not collective across MPI processes. 747 748 @param[in,out] mat `MATCEED` 749 @param[in] X_loc Input PETSc local vector, or NULL 750 @param[in] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 751 752 @return An error code: 0 - success, otherwise - failure 753 **/ 754 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) { 755 MatCeedContext ctx; 756 757 PetscFunctionBeginUser; 758 PetscCall(MatShellGetContext(mat, &ctx)); 759 if (X_loc) { 760 PetscInt len_old, len_new; 761 762 PetscCall(VecGetSize(ctx->X_loc, &len_old)); 763 PetscCall(VecGetSize(X_loc, &len_new)); 764 PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT, 765 len_new, len_old); 766 PetscCall(VecDestroy(&ctx->X_loc)); 767 ctx->X_loc = X_loc; 768 PetscCall(PetscObjectReference((PetscObject)X_loc)); 769 } 770 if (Y_loc_transpose) { 771 PetscInt len_old, len_new; 772 773 PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old)); 774 PetscCall(VecGetSize(Y_loc_transpose, &len_new)); 775 PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, 776 "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old); 777 PetscCall(VecDestroy(&ctx->Y_loc_transpose)); 778 ctx->Y_loc_transpose = Y_loc_transpose; 779 PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose)); 780 } 781 PetscFunctionReturn(PETSC_SUCCESS); 782 } 783 784 /** 785 @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 786 787 Not collective across MPI processes. 788 789 @param[in,out] mat `MATCEED` 790 @param[out] X_loc Input PETSc local vector, or NULL 791 @param[out] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 792 793 @return An error code: 0 - success, otherwise - failure 794 **/ 795 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) { 796 MatCeedContext ctx; 797 798 PetscFunctionBeginUser; 799 PetscCall(MatShellGetContext(mat, &ctx)); 800 if (X_loc) { 801 *X_loc = ctx->X_loc; 802 PetscCall(PetscObjectReference((PetscObject)*X_loc)); 803 } 804 if (Y_loc_transpose) { 805 *Y_loc_transpose = ctx->Y_loc_transpose; 806 PetscCall(PetscObjectReference((PetscObject)*Y_loc_transpose)); 807 } 808 PetscFunctionReturn(PETSC_SUCCESS); 809 } 810 811 /** 812 @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 813 814 Not collective across MPI processes. 815 816 @param[in,out] mat MatCeed 817 @param[out] X_loc Input PETSc local vector, or NULL 818 @param[out] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 819 820 @return An error code: 0 - success, otherwise - failure 821 **/ 822 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) { 823 PetscFunctionBeginUser; 824 if (X_loc) PetscCall(VecDestroy(X_loc)); 825 if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose)); 826 PetscFunctionReturn(PETSC_SUCCESS); 827 } 828 829 /** 830 @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 831 832 Not collective across MPI processes. 833 834 @param[in,out] mat MatCeed 835 @param[out] op_mult libCEED `CeedOperator` for `MatMult()`, or NULL 836 @param[out] op_mult_transpose libCEED `CeedOperator` for `MatMultTranspose()`, or NULL 837 838 @return An error code: 0 - success, otherwise - failure 839 **/ 840 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) { 841 MatCeedContext ctx; 842 843 PetscFunctionBeginUser; 844 PetscCall(MatShellGetContext(mat, &ctx)); 845 if (op_mult) { 846 *op_mult = NULL; 847 PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult)); 848 } 849 if (op_mult_transpose) { 850 *op_mult_transpose = NULL; 851 PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose)); 852 } 853 PetscFunctionReturn(PETSC_SUCCESS); 854 } 855 856 /** 857 @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 858 859 Not collective across MPI processes. 860 861 @param[in,out] mat MatCeed 862 @param[out] op_mult libCEED `CeedOperator` for `MatMult()`, or NULL 863 @param[out] op_mult_transpose libCEED `CeedOperator` for `MatMultTranspose()`, or NULL 864 865 @return An error code: 0 - success, otherwise - failure 866 **/ 867 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) { 868 MatCeedContext ctx; 869 870 PetscFunctionBeginUser; 871 PetscCall(MatShellGetContext(mat, &ctx)); 872 if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult)); 873 if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose)); 874 PetscFunctionReturn(PETSC_SUCCESS); 875 } 876 877 /** 878 @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators. 879 880 Not collective across MPI processes. 881 882 @param[in,out] mat MatCeed 883 @param[out] log_event_mult `PetscLogEvent` for forward evaluation, or NULL 884 @param[out] log_event_mult_transpose `PetscLogEvent` for transpose evaluation, or NULL 885 886 @return An error code: 0 - success, otherwise - failure 887 **/ 888 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) { 889 MatCeedContext ctx; 890 891 PetscFunctionBeginUser; 892 PetscCall(MatShellGetContext(mat, &ctx)); 893 if (log_event_mult) ctx->log_event_mult = log_event_mult; 894 if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose; 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 /** 899 @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators. 900 901 Not collective across MPI processes. 902 903 @param[in,out] mat MatCeed 904 @param[out] log_event_mult `PetscLogEvent` for forward evaluation, or NULL 905 @param[out] log_event_mult_transpose `PetscLogEvent` for transpose evaluation, or NULL 906 907 @return An error code: 0 - success, otherwise - failure 908 **/ 909 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) { 910 MatCeedContext ctx; 911 912 PetscFunctionBeginUser; 913 PetscCall(MatShellGetContext(mat, &ctx)); 914 if (log_event_mult) *log_event_mult = ctx->log_event_mult; 915 if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose; 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 919 // ----------------------------------------------------------------------------- 920 // Operator context data 921 // ----------------------------------------------------------------------------- 922 923 /** 924 @brief Setup context data for operator application. 925 926 Collective across MPI processes. 927 928 @param[in] dm_x Input `DM` 929 @param[in] dm_y Output `DM` 930 @param[in] X_loc Input PETSc local vector, or NULL 931 @param[in] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 932 @param[in] op_mult `CeedOperator` for forward evaluation 933 @param[in] op_mult_transpose `CeedOperator` for transpose evaluation 934 @param[in] log_event_mult `PetscLogEvent` for forward evaluation 935 @param[in] log_event_mult_transpose `PetscLogEvent` for transpose evaluation 936 @param[out] ctx Context data for operator evaluation 937 938 @return An error code: 0 - success, otherwise - failure 939 **/ 940 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose, 941 PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, MatCeedContext *ctx) { 942 CeedSize x_loc_len, y_loc_len; 943 944 PetscFunctionBeginUser; 945 946 // Allocate 947 PetscCall(PetscNew(ctx)); 948 (*ctx)->ref_count = 1; 949 950 // Logging 951 (*ctx)->log_event_mult = log_event_mult; 952 (*ctx)->log_event_mult_transpose = log_event_mult_transpose; 953 954 // PETSc objects 955 PetscCall(PetscObjectReference((PetscObject)dm_x)); 956 (*ctx)->dm_x = dm_x; 957 PetscCall(PetscObjectReference((PetscObject)dm_y)); 958 (*ctx)->dm_y = dm_y; 959 if (X_loc) PetscCall(PetscObjectReference((PetscObject)X_loc)); 960 (*ctx)->X_loc = X_loc; 961 if (Y_loc_transpose) PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose)); 962 (*ctx)->Y_loc_transpose = Y_loc_transpose; 963 964 // Memtype 965 { 966 const PetscScalar *x; 967 Vec X; 968 969 PetscCall(DMGetLocalVector(dm_x, &X)); 970 PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type)); 971 PetscCall(VecRestoreArrayReadAndMemType(X, &x)); 972 PetscCall(DMRestoreLocalVector(dm_x, &X)); 973 } 974 975 // libCEED objects 976 PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, 977 "retrieving Ceed context object failed"); 978 PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed)); 979 PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len)); 980 PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult)); 981 if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose)); 982 PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc)); 983 PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc)); 984 985 // Flop counting 986 { 987 CeedSize ceed_flops_estimate = 0; 988 989 PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate)); 990 (*ctx)->flops_mult = ceed_flops_estimate; 991 if (op_mult_transpose) { 992 PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate)); 993 (*ctx)->flops_mult_transpose = ceed_flops_estimate; 994 } 995 } 996 997 // Check sizes 998 if (x_loc_len > 0 || y_loc_len > 0) { 999 CeedSize ctx_x_loc_len, ctx_y_loc_len; 1000 PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len; 1001 Vec dm_X_loc, dm_Y_loc; 1002 1003 // -- Input 1004 PetscCall(DMGetLocalVector(dm_x, &dm_X_loc)); 1005 PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len)); 1006 PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc)); 1007 if (X_loc) PetscCall(VecGetLocalSize(X_loc, &X_loc_len)); 1008 PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len)); 1009 if (X_loc) PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "X_loc must match dm_x dimensions"); 1010 PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op must match dm_x dimensions"); 1011 PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc must match op dimensions"); 1012 1013 // -- Output 1014 PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc)); 1015 PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len)); 1016 PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc)); 1017 PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len)); 1018 PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op must match dm_y dimensions"); 1019 1020 // -- Transpose 1021 if (Y_loc_transpose) { 1022 PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len)); 1023 PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "Y_loc_transpose must match dm_y dimensions"); 1024 } 1025 } 1026 PetscFunctionReturn(PETSC_SUCCESS); 1027 } 1028 1029 /** 1030 @brief Increment reference counter for `MATCEED` context. 1031 1032 Not collective across MPI processes. 1033 1034 @param[in,out] ctx Context data 1035 1036 @return An error code: 0 - success, otherwise - failure 1037 **/ 1038 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) { 1039 PetscFunctionBeginUser; 1040 ctx->ref_count++; 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } 1043 1044 /** 1045 @brief Copy reference for `MATCEED`. 1046 Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`. 1047 1048 Not collective across MPI processes. 1049 1050 @param[in] ctx Context data 1051 @param[out] ctx_copy Copy of pointer to context data 1052 1053 @return An error code: 0 - success, otherwise - failure 1054 **/ 1055 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) { 1056 PetscFunctionBeginUser; 1057 PetscCall(MatCeedContextReference(ctx)); 1058 PetscCall(MatCeedContextDestroy(*ctx_copy)); 1059 *ctx_copy = ctx; 1060 PetscFunctionReturn(PETSC_SUCCESS); 1061 } 1062 1063 /** 1064 @brief Destroy context data for operator application. 1065 1066 Collective across MPI processes. 1067 1068 @param[in,out] ctx Context data for operator evaluation 1069 1070 @return An error code: 0 - success, otherwise - failure 1071 **/ 1072 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) { 1073 PetscFunctionBeginUser; 1074 if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS); 1075 1076 // PETSc objects 1077 PetscCall(DMDestroy(&ctx->dm_x)); 1078 PetscCall(DMDestroy(&ctx->dm_y)); 1079 PetscCall(VecDestroy(&ctx->X_loc)); 1080 PetscCall(VecDestroy(&ctx->Y_loc_transpose)); 1081 PetscCall(MatDestroy(&ctx->mat_assembled_full_internal)); 1082 PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal)); 1083 PetscCall(PetscFree(ctx->internal_mat_type)); 1084 PetscCall(PetscFree(ctx->mats_assembled_full)); 1085 PetscCall(PetscFree(ctx->mats_assembled_pbd)); 1086 1087 // libCEED objects 1088 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc)); 1089 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc)); 1090 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full)); 1091 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd)); 1092 PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult)); 1093 PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose)); 1094 PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed"); 1095 1096 // Deallocate 1097 ctx->is_destroyed = PETSC_TRUE; // Flag as destroyed in case someone has stale ref 1098 PetscCall(PetscFree(ctx)); 1099 PetscFunctionReturn(PETSC_SUCCESS); 1100 } 1101 1102 /** 1103 @brief Compute the diagonal of an operator via libCEED. 1104 1105 Collective across MPI processes. 1106 1107 @param[in] A `MATCEED` 1108 @param[out] D Vector holding operator diagonal 1109 1110 @return An error code: 0 - success, otherwise - failure 1111 **/ 1112 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) { 1113 PetscMemType mem_type; 1114 Vec D_loc; 1115 MatCeedContext ctx; 1116 1117 PetscFunctionBeginUser; 1118 PetscCall(MatShellGetContext(A, &ctx)); 1119 1120 // Place PETSc vector in libCEED vector 1121 PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc)); 1122 PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc)); 1123 1124 // Compute Diagonal 1125 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE)); 1126 1127 // Restore PETSc vector 1128 PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc)); 1129 1130 // Local-to-Global 1131 PetscCall(VecZeroEntries(D)); 1132 PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D)); 1133 PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc)); 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 1137 /** 1138 @brief Compute `A X = Y` for a `MATCEED`. 1139 1140 Collective across MPI processes. 1141 1142 @param[in] A `MATCEED` 1143 @param[in] X Input PETSc vector 1144 @param[out] Y Output PETSc vector 1145 1146 @return An error code: 0 - success, otherwise - failure 1147 **/ 1148 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) { 1149 MatCeedContext ctx; 1150 1151 PetscFunctionBeginUser; 1152 PetscCall(MatShellGetContext(A, &ctx)); 1153 PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, 0)); 1154 1155 { 1156 PetscMemType x_mem_type, y_mem_type; 1157 Vec X_loc = ctx->X_loc, Y_loc; 1158 1159 // Get local vectors 1160 if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc)); 1161 PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc)); 1162 1163 // Global-to-local 1164 PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc)); 1165 1166 // Setup libCEED vectors 1167 PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc)); 1168 PetscCall(VecZeroEntries(Y_loc)); 1169 PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc)); 1170 1171 // Apply libCEED operator 1172 PetscCall(PetscLogGpuTimeBegin()); 1173 PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE)); 1174 PetscCall(PetscLogGpuTimeEnd()); 1175 1176 // Restore PETSc vectors 1177 PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc)); 1178 PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc)); 1179 1180 // Local-to-global 1181 PetscCall(VecZeroEntries(Y)); 1182 PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y)); 1183 1184 // Restore local vectors, as needed 1185 if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc)); 1186 PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc)); 1187 } 1188 1189 // Log flops 1190 if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult)); 1191 else PetscCall(PetscLogFlops(ctx->flops_mult)); 1192 1193 PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, 0)); 1194 PetscFunctionReturn(PETSC_SUCCESS); 1195 } 1196 1197 /** 1198 @brief Compute `A^T Y = X` for a `MATCEED`. 1199 1200 Collective across MPI processes. 1201 1202 @param[in] A `MATCEED` 1203 @param[in] Y Input PETSc vector 1204 @param[out] X Output PETSc vector 1205 1206 @return An error code: 0 - success, otherwise - failure 1207 **/ 1208 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) { 1209 MatCeedContext ctx; 1210 1211 PetscFunctionBeginUser; 1212 PetscCall(MatShellGetContext(A, &ctx)); 1213 PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, 0)); 1214 1215 { 1216 PetscMemType x_mem_type, y_mem_type; 1217 Vec X_loc, Y_loc = ctx->Y_loc_transpose; 1218 1219 // Get local vectors 1220 if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc)); 1221 PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc)); 1222 1223 // Global-to-local 1224 PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc)); 1225 1226 // Setup libCEED vectors 1227 PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc)); 1228 PetscCall(VecZeroEntries(X_loc)); 1229 PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc)); 1230 1231 // Apply libCEED operator 1232 PetscCall(PetscLogGpuTimeBegin()); 1233 PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE)); 1234 PetscCall(PetscLogGpuTimeEnd()); 1235 1236 // Restore PETSc vectors 1237 PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc)); 1238 PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc)); 1239 1240 // Local-to-global 1241 PetscCall(VecZeroEntries(X)); 1242 PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X)); 1243 1244 // Restore local vectors, as needed 1245 if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc)); 1246 PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc)); 1247 } 1248 1249 // Log flops 1250 if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose)); 1251 else PetscCall(PetscLogFlops(ctx->flops_mult_transpose)); 1252 1253 PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, 0)); 1254 PetscFunctionReturn(PETSC_SUCCESS); 1255 } 1256