1 /// @file 2 /// MatCeed and it's related operators 3 4 #include <ceed-utils.h> 5 #include <ceed.h> 6 #include <ceed/backend.h> 7 #include <mat-ceed-impl.h> 8 #include <mat-ceed.h> 9 #include <petscdmplex.h> 10 #include <stdlib.h> 11 #include <string.h> 12 13 PetscClassId MATCEED_CLASSID; 14 PetscLogEvent MATCEED_MULT, MATCEED_MULT_TRANSPOSE; 15 16 /** 17 @brief Register MATCEED log events. 18 19 Not collective across MPI processes. 20 21 @return An error code: 0 - success, otherwise - failure 22 **/ 23 static PetscErrorCode MatCeedRegisterLogEvents() { 24 static bool registered = false; 25 26 PetscFunctionBeginUser; 27 if (registered) PetscFunctionReturn(PETSC_SUCCESS); 28 PetscCall(PetscClassIdRegister("MATCEED", &MATCEED_CLASSID)); 29 PetscCall(PetscLogEventRegister("MATCEED Mult", MATCEED_CLASSID, &MATCEED_MULT)); 30 PetscCall(PetscLogEventRegister("MATCEED Mult Transpose", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE)); 31 registered = true; 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 /** 36 @brief Setup inner `Mat` for `PC` operations not directly supported by libCEED. 37 38 Collective across MPI processes. 39 40 @param[in] mat_ceed `MATCEED` to setup 41 @param[out] mat_inner Inner `Mat` 42 43 @return An error code: 0 - success, otherwise - failure 44 **/ 45 static PetscErrorCode MatCeedSetupInnerMat(Mat mat_ceed, Mat *mat_inner) { 46 MatCeedContext ctx; 47 48 PetscFunctionBeginUser; 49 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 50 51 PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "PC only supported for MATCEED on a single DM"); 52 53 // Check cl mat type 54 { 55 PetscBool is_internal_mat_type_cl = PETSC_FALSE; 56 char internal_mat_type_cl[64]; 57 58 // Check for specific CL inner mat type for this Mat 59 { 60 const char *mat_ceed_prefix = NULL; 61 62 PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix)); 63 PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL); 64 PetscCall(PetscOptionsFList("-ceed_inner_mat_type", "MATCEED inner assembled MatType for PC support", NULL, MatList, internal_mat_type_cl, 65 internal_mat_type_cl, sizeof(internal_mat_type_cl), &is_internal_mat_type_cl)); 66 PetscOptionsEnd(); 67 if (is_internal_mat_type_cl) { 68 PetscCall(PetscFree(ctx->internal_mat_type)); 69 PetscCall(PetscStrallocpy(internal_mat_type_cl, &ctx->internal_mat_type)); 70 } 71 } 72 } 73 74 // Create sparse matrix 75 { 76 MatType dm_mat_type, dm_mat_type_copy; 77 78 PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type)); 79 PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy)); 80 PetscCall(DMSetMatType(ctx->dm_x, ctx->internal_mat_type)); 81 PetscCall(DMCreateMatrix(ctx->dm_x, mat_inner)); 82 PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy)); 83 PetscCall(PetscFree(dm_mat_type_copy)); 84 } 85 PetscFunctionReturn(PETSC_SUCCESS); 86 } 87 88 /** 89 @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar. 90 The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`. 91 The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail. 92 93 Collective across MPI processes. 94 95 @param[in] mat_ceed `MATCEED` to assemble 96 @param[in,out] mat_coo `MATAIJ` or similar to assemble into 97 98 @return An error code: 0 - success, otherwise - failure 99 **/ 100 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) { 101 MatCeedContext ctx; 102 103 PetscFunctionBeginUser; 104 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 105 106 // Check if COO pattern set 107 { 108 PetscInt index = -1; 109 110 for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) { 111 if (ctx->mats_assembled_pbd[i] == mat_coo) index = i; 112 } 113 if (index == -1) { 114 PetscInt *rows_petsc = NULL, *cols_petsc = NULL; 115 CeedInt *rows_ceed, *cols_ceed; 116 PetscCount num_entries; 117 PetscLogStage stage_amg_setup; 118 119 // -- Assemble sparsity pattern if mat hasn't been assembled before 120 PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup)); 121 if (stage_amg_setup == -1) { 122 PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup)); 123 } 124 PetscCall(PetscLogStagePush(stage_amg_setup)); 125 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed)); 126 PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc)); 127 PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc)); 128 PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc)); 129 free(rows_petsc); 130 free(cols_petsc); 131 if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd)); 132 PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd)); 133 ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo; 134 PetscCall(PetscLogStagePop()); 135 } 136 } 137 138 // Assemble mat_ceed 139 PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY)); 140 { 141 const CeedScalar *values; 142 MatType mat_type; 143 CeedMemType mem_type = CEED_MEM_HOST; 144 PetscBool is_spd, is_spd_known; 145 146 PetscCall(MatGetType(mat_coo, &mat_type)); 147 if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; 148 else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE; 149 else mem_type = CEED_MEM_HOST; 150 151 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE)); 152 PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values)); 153 PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES)); 154 PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd)); 155 if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd)); 156 PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values)); 157 } 158 PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY)); 159 PetscFunctionReturn(PETSC_SUCCESS); 160 } 161 162 /** 163 @brief Assemble inner `Mat` for diagonal `PC` operations 164 165 Collective across MPI processes. 166 167 @param[in] mat_ceed `MATCEED` to invert 168 @param[in] use_ceed_pbd Boolean flag to use libCEED PBD assembly 169 @param[out] mat_inner Inner `Mat` for diagonal operations 170 171 @return An error code: 0 - success, otherwise - failure 172 **/ 173 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) { 174 MatCeedContext ctx; 175 176 PetscFunctionBeginUser; 177 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 178 if (use_ceed_pbd) { 179 // Check if COO pattern set 180 if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_pbd_internal)); 181 182 // Assemble mat_assembled_full_internal 183 PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal)); 184 if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal; 185 } else { 186 // Check if COO pattern set 187 if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedSetupInnerMat(mat_ceed, &ctx->mat_assembled_full_internal)); 188 189 // Assemble mat_assembled_full_internal 190 PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal)); 191 if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal; 192 } 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 /** 197 @brief Get `MATCEED` diagonal block for Jacobi. 198 199 Collective across MPI processes. 200 201 @param[in] mat_ceed `MATCEED` to invert 202 @param[out] mat_block The diagonal block matrix 203 204 @return An error code: 0 - success, otherwise - failure 205 **/ 206 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) { 207 Mat mat_inner = NULL; 208 MatCeedContext ctx; 209 210 PetscFunctionBeginUser; 211 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 212 213 // Assemble inner mat if needed 214 PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner)); 215 216 // Get block diagonal 217 PetscCall(MatGetDiagonalBlock(mat_inner, mat_block)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 /** 222 @brief Invert `MATCEED` diagonal block for Jacobi. 223 224 Collective across MPI processes. 225 226 @param[in] mat_ceed `MATCEED` to invert 227 @param[out] values The block inverses in column major order 228 229 @return An error code: 0 - success, otherwise - failure 230 **/ 231 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) { 232 Mat mat_inner = NULL; 233 MatCeedContext ctx; 234 235 PetscFunctionBeginUser; 236 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 237 238 // Assemble inner mat if needed 239 PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner)); 240 241 // Invert PB diagonal 242 PetscCall(MatInvertBlockDiagonal(mat_inner, values)); 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 /** 247 @brief Invert `MATCEED` variable diagonal block for Jacobi. 248 249 Collective across MPI processes. 250 251 @param[in] mat_ceed `MATCEED` to invert 252 @param[in] num_blocks The number of blocks on the process 253 @param[in] block_sizes The size of each block on the process 254 @param[out] values The block inverses in column major order 255 256 @return An error code: 0 - success, otherwise - failure 257 **/ 258 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) { 259 Mat mat_inner = NULL; 260 MatCeedContext ctx; 261 262 PetscFunctionBeginUser; 263 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 264 265 // Assemble inner mat if needed 266 PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner)); 267 268 // Invert PB diagonal 269 PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 // ----------------------------------------------------------------------------- 274 // MatCeed 275 // ----------------------------------------------------------------------------- 276 277 /** 278 @brief Create PETSc `Mat` from libCEED operators. 279 280 Collective across MPI processes. 281 282 @param[in] dm_x Input `DM` 283 @param[in] dm_y Output `DM` 284 @param[in] op_mult `CeedOperator` for forward evaluation 285 @param[in] op_mult_transpose `CeedOperator` for transpose evaluation 286 @param[out] mat New MatCeed 287 288 @return An error code: 0 - success, otherwise - failure 289 **/ 290 PetscErrorCode MatCeedCreate(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) { 291 PetscInt X_l_size, X_g_size, Y_l_size, Y_g_size; 292 VecType vec_type; 293 MatCeedContext ctx; 294 295 PetscFunctionBeginUser; 296 PetscCall(MatCeedRegisterLogEvents()); 297 298 // Collect context data 299 PetscCall(DMGetVecType(dm_x, &vec_type)); 300 { 301 Vec X; 302 303 PetscCall(DMGetGlobalVector(dm_x, &X)); 304 PetscCall(VecGetSize(X, &X_g_size)); 305 PetscCall(VecGetLocalSize(X, &X_l_size)); 306 PetscCall(DMRestoreGlobalVector(dm_x, &X)); 307 } 308 if (dm_y) { 309 Vec Y; 310 311 PetscCall(DMGetGlobalVector(dm_y, &Y)); 312 PetscCall(VecGetSize(Y, &Y_g_size)); 313 PetscCall(VecGetLocalSize(Y, &Y_l_size)); 314 PetscCall(DMRestoreGlobalVector(dm_y, &Y)); 315 } else { 316 dm_y = dm_x; 317 Y_g_size = X_g_size; 318 Y_l_size = X_l_size; 319 } 320 // Create context 321 { 322 Vec X_loc, Y_loc_transpose = NULL; 323 324 PetscCall(DMCreateLocalVector(dm_x, &X_loc)); 325 PetscCall(VecZeroEntries(X_loc)); 326 if (op_mult_transpose) { 327 PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose)); 328 PetscCall(VecZeroEntries(Y_loc_transpose)); 329 } 330 PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE, &ctx)); 331 PetscCall(VecDestroy(&X_loc)); 332 PetscCall(VecDestroy(&Y_loc_transpose)); 333 } 334 335 // Create mat 336 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat)); 337 PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED)); 338 // -- Set block and variable block sizes 339 if (dm_x == dm_y) { 340 MatType dm_mat_type, dm_mat_type_copy; 341 Mat temp_mat; 342 343 PetscCall(DMGetMatType(dm_x, &dm_mat_type)); 344 PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy)); 345 PetscCall(DMSetMatType(dm_x, MATAIJ)); 346 PetscCall(DMCreateMatrix(dm_x, &temp_mat)); 347 PetscCall(DMSetMatType(dm_x, dm_mat_type_copy)); 348 PetscCall(PetscFree(dm_mat_type_copy)); 349 350 { 351 PetscInt block_size, num_blocks, max_vblock_size = PETSC_INT_MAX; 352 const PetscInt *vblock_sizes; 353 354 // -- Get block sizes 355 PetscCall(MatGetBlockSize(temp_mat, &block_size)); 356 PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes)); 357 { 358 PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX}; 359 360 for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]); 361 PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max)); 362 max_vblock_size = global_min_max[1]; 363 } 364 365 // -- Copy block sizes 366 if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size)); 367 if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes)); 368 369 // -- Check libCEED compatibility 370 { 371 bool is_composite; 372 373 ctx->is_ceed_pbd_valid = PETSC_TRUE; 374 ctx->is_ceed_vpbd_valid = PETSC_TRUE; 375 PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite)); 376 if (is_composite) { 377 CeedInt num_sub_operators; 378 CeedOperator *sub_operators; 379 380 PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators)); 381 PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators)); 382 for (CeedInt i = 0; i < num_sub_operators; i++) { 383 CeedInt num_bases, num_comp; 384 CeedBasis *active_bases; 385 CeedOperatorAssemblyData assembly_data; 386 387 PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data)); 388 PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL)); 389 PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp)); 390 if (num_bases > 1) { 391 ctx->is_ceed_pbd_valid = PETSC_FALSE; 392 ctx->is_ceed_vpbd_valid = PETSC_FALSE; 393 } 394 if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; 395 if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE; 396 } 397 } else { 398 // LCOV_EXCL_START 399 CeedInt num_bases, num_comp; 400 CeedBasis *active_bases; 401 CeedOperatorAssemblyData assembly_data; 402 403 PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data)); 404 PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL)); 405 PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp)); 406 if (num_bases > 1) { 407 ctx->is_ceed_pbd_valid = PETSC_FALSE; 408 ctx->is_ceed_vpbd_valid = PETSC_FALSE; 409 } 410 if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; 411 if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE; 412 // LCOV_EXCL_STOP 413 } 414 { 415 PetscInt local_is_valid[2], global_is_valid[2]; 416 417 local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid; 418 PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid)); 419 ctx->is_ceed_pbd_valid = global_is_valid[0]; 420 local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid; 421 PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid)); 422 ctx->is_ceed_vpbd_valid = global_is_valid[0]; 423 } 424 } 425 } 426 PetscCall(MatDestroy(&temp_mat)); 427 } 428 // -- Set internal mat type 429 { 430 VecType vec_type; 431 MatType internal_mat_type = MATAIJ; 432 433 PetscCall(VecGetType(ctx->X_loc, &vec_type)); 434 if (strstr(vec_type, VECCUDA)) internal_mat_type = MATAIJCUSPARSE; 435 else if (strstr(vec_type, VECKOKKOS)) internal_mat_type = MATAIJKOKKOS; 436 else internal_mat_type = MATAIJ; 437 PetscCall(PetscStrallocpy(internal_mat_type, &ctx->internal_mat_type)); 438 } 439 // -- Set mat operations 440 PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy)); 441 PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed)); 442 if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed)); 443 PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed)); 444 PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed)); 445 PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed)); 446 PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed)); 447 PetscCall(MatShellSetVecType(*mat, vec_type)); 448 PetscFunctionReturn(PETSC_SUCCESS); 449 } 450 451 /** 452 @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`. 453 454 Collective across MPI processes. 455 456 @param[in] mat_ceed `MATCEED` to copy from 457 @param[out] mat_other `MatShell` or `MATCEED` to copy into 458 459 @return An error code: 0 - success, otherwise - failure 460 **/ 461 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) { 462 PetscFunctionBeginUser; 463 PetscCall(MatCeedRegisterLogEvents()); 464 465 // Check type compatibility 466 { 467 MatType mat_type_ceed, mat_type_other; 468 469 PetscCall(MatGetType(mat_ceed, &mat_type_ceed)); 470 PetscCheck(!strcmp(mat_type_ceed, MATCEED), PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED); 471 PetscCall(MatGetType(mat_ceed, &mat_type_other)); 472 PetscCheck(!strcmp(mat_type_other, MATCEED) || !strcmp(mat_type_other, MATSHELL), PETSC_COMM_SELF, PETSC_ERR_LIB, 473 "mat_other must have type " MATCEED " or " MATSHELL); 474 } 475 476 // Check dimension compatibility 477 { 478 PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size; 479 480 PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size)); 481 PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size)); 482 PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size)); 483 PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size)); 484 PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) && 485 (X_l_ceed_size == X_l_other_size), 486 PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, 487 "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT 488 "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT 489 ", %" PetscInt_FMT ")", 490 Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size); 491 } 492 493 // Convert 494 { 495 VecType vec_type; 496 MatCeedContext ctx; 497 498 PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED)); 499 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 500 PetscCall(MatCeedContextReference(ctx)); 501 PetscCall(MatShellSetContext(mat_other, ctx)); 502 PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy)); 503 PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed)); 504 if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed)); 505 PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed)); 506 PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed)); 507 PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed)); 508 PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed)); 509 { 510 PetscInt block_size; 511 512 PetscCall(MatGetBlockSize(mat_ceed, &block_size)); 513 if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size)); 514 } 515 { 516 PetscInt num_blocks; 517 const PetscInt *block_sizes; 518 519 PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes)); 520 if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes)); 521 } 522 PetscCall(DMGetVecType(ctx->dm_x, &vec_type)); 523 PetscCall(MatShellSetVecType(mat_other, vec_type)); 524 } 525 PetscFunctionReturn(PETSC_SUCCESS); 526 } 527 528 /** 529 @brief Assemble a `MATCEED` into a `MATAIJ` or similar. 530 The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`. 531 The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail. 532 533 Collective across MPI processes. 534 535 @param[in] mat_ceed `MATCEED` to assemble 536 @param[in,out] mat_coo `MATAIJ` or similar to assemble into 537 538 @return An error code: 0 - success, otherwise - failure 539 **/ 540 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) { 541 MatCeedContext ctx; 542 543 PetscFunctionBeginUser; 544 PetscCall(MatShellGetContext(mat_ceed, &ctx)); 545 546 // Check if COO pattern set 547 { 548 PetscInt index = -1; 549 550 for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) { 551 if (ctx->mats_assembled_full[i] == mat_coo) index = i; 552 } 553 if (index == -1) { 554 PetscInt *rows_petsc = NULL, *cols_petsc = NULL; 555 CeedInt *rows_ceed, *cols_ceed; 556 PetscCount num_entries; 557 PetscLogStage stage_amg_setup; 558 559 // -- Assemble sparsity pattern if mat hasn't been assembled before 560 PetscCall(PetscLogStageGetId("MATCEED Assembly Setup", &stage_amg_setup)); 561 if (stage_amg_setup == -1) { 562 PetscCall(PetscLogStageRegister("MATCEED Assembly Setup", &stage_amg_setup)); 563 } 564 PetscCall(PetscLogStagePush(stage_amg_setup)); 565 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed)); 566 PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc)); 567 PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc)); 568 PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc)); 569 free(rows_petsc); 570 free(cols_petsc); 571 if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full)); 572 PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full)); 573 ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo; 574 PetscCall(PetscLogStagePop()); 575 } 576 } 577 578 // Assemble mat_ceed 579 PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY)); 580 { 581 const CeedScalar *values; 582 MatType mat_type; 583 CeedMemType mem_type = CEED_MEM_HOST; 584 PetscBool is_spd, is_spd_known; 585 586 PetscCall(MatGetType(mat_coo, &mat_type)); 587 if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; 588 else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE; 589 else mem_type = CEED_MEM_HOST; 590 591 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full)); 592 PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values)); 593 PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES)); 594 PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd)); 595 if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd)); 596 PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values)); 597 } 598 PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY)); 599 PetscFunctionReturn(PETSC_SUCCESS); 600 } 601 602 /** 603 @brief Set user context for a `MATCEED`. 604 605 Collective across MPI processes. 606 607 @param[in,out] mat `MATCEED` 608 @param[in] f The context destroy function, or NULL 609 @param[in] ctx User context, or NULL to unset 610 611 @return An error code: 0 - success, otherwise - failure 612 **/ 613 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) { 614 PetscContainer user_ctx = NULL; 615 616 PetscFunctionBeginUser; 617 if (ctx) { 618 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx)); 619 PetscCall(PetscContainerSetPointer(user_ctx, ctx)); 620 PetscCall(PetscContainerSetUserDestroy(user_ctx, f)); 621 } 622 PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx)); 623 PetscCall(PetscContainerDestroy(&user_ctx)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 /** 628 @brief Retrieve the user context for a `MATCEED`. 629 630 Collective across MPI processes. 631 632 @param[in,out] mat `MATCEED` 633 @param[in] ctx User context 634 635 @return An error code: 0 - success, otherwise - failure 636 **/ 637 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) { 638 PetscContainer user_ctx; 639 640 PetscFunctionBeginUser; 641 PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx)); 642 if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx)); 643 else *(void **)ctx = NULL; 644 PetscFunctionReturn(PETSC_SUCCESS); 645 } 646 647 /** 648 @brief Sets the inner matrix type as a string from the `MATCEED`. 649 650 Collective across MPI processes. 651 652 @param[in,out] mat `MATCEED` 653 @param[in] type Inner `MatType` to set 654 655 @return An error code: 0 - success, otherwise - failure 656 **/ 657 PetscErrorCode MatCeedSetInnerMatType(Mat mat, MatType type) { 658 MatCeedContext ctx; 659 660 PetscFunctionBeginUser; 661 PetscCall(MatShellGetContext(mat, &ctx)); 662 // Check if same 663 { 664 size_t len_old, len_new; 665 PetscBool is_same = PETSC_FALSE; 666 667 PetscCall(PetscStrlen(ctx->internal_mat_type, &len_old)); 668 PetscCall(PetscStrlen(type, &len_new)); 669 if (len_old == len_new) PetscCall(PetscStrncmp(ctx->internal_mat_type, type, len_old, &is_same)); 670 if (is_same) PetscFunctionReturn(PETSC_SUCCESS); 671 } 672 // Clean up old mats in different format 673 // LCOV_EXCL_START 674 if (ctx->mat_assembled_full_internal) { 675 for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) { 676 if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) { 677 for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) { 678 ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j]; 679 } 680 ctx->num_mats_assembled_full--; 681 // Note: we'll realloc this array again, so no need to shrink the allocation 682 PetscCall(MatDestroy(&ctx->mat_assembled_full_internal)); 683 } 684 } 685 } 686 if (ctx->mat_assembled_pbd_internal) { 687 for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) { 688 if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) { 689 for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) { 690 ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j]; 691 } 692 // Note: we'll realloc this array again, so no need to shrink the allocation 693 ctx->num_mats_assembled_pbd--; 694 PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal)); 695 } 696 } 697 } 698 PetscCall(PetscFree(ctx->internal_mat_type)); 699 PetscCall(PetscStrallocpy(type, &ctx->internal_mat_type)); 700 PetscFunctionReturn(PETSC_SUCCESS); 701 // LCOV_EXCL_STOP 702 } 703 704 /** 705 @brief Gets the inner matrix type as a string from the `MATCEED`. 706 707 Collective across MPI processes. 708 709 @param[in,out] mat `MATCEED` 710 @param[in] type Inner `MatType` 711 712 @return An error code: 0 - success, otherwise - failure 713 **/ 714 PetscErrorCode MatCeedGetInnerMatType(Mat mat, MatType *type) { 715 MatCeedContext ctx; 716 717 PetscFunctionBeginUser; 718 PetscCall(MatShellGetContext(mat, &ctx)); 719 *type = ctx->internal_mat_type; 720 PetscFunctionReturn(PETSC_SUCCESS); 721 } 722 723 /** 724 @brief Set a user defined matrix operation for a `MATCEED` matrix. 725 726 Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by 727 `MatCeedSetContext()`. 728 729 Collective across MPI processes. 730 731 @param[in,out] mat `MATCEED` 732 @param[in] op Name of the `MatOperation` 733 @param[in] g Function that provides the operation 734 735 @return An error code: 0 - success, otherwise - failure 736 **/ 737 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) { 738 PetscFunctionBeginUser; 739 PetscCall(MatShellSetOperation(mat, op, g)); 740 PetscFunctionReturn(PETSC_SUCCESS); 741 } 742 743 /** 744 @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 745 746 Not collective across MPI processes. 747 748 @param[in,out] mat `MATCEED` 749 @param[in] X_loc Input PETSc local vector, or NULL 750 @param[in] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 751 752 @return An error code: 0 - success, otherwise - failure 753 **/ 754 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) { 755 MatCeedContext ctx; 756 757 PetscFunctionBeginUser; 758 PetscCall(MatShellGetContext(mat, &ctx)); 759 if (X_loc) { 760 PetscInt len_old, len_new; 761 762 PetscCall(VecGetSize(ctx->X_loc, &len_old)); 763 PetscCall(VecGetSize(X_loc, &len_new)); 764 PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT, 765 len_new, len_old); 766 PetscCall(VecDestroy(&ctx->X_loc)); 767 ctx->X_loc = X_loc; 768 PetscCall(PetscObjectReference((PetscObject)X_loc)); 769 } 770 if (Y_loc_transpose) { 771 PetscInt len_old, len_new; 772 773 PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old)); 774 PetscCall(VecGetSize(Y_loc_transpose, &len_new)); 775 PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, 776 "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old); 777 PetscCall(VecDestroy(&ctx->Y_loc_transpose)); 778 ctx->Y_loc_transpose = Y_loc_transpose; 779 PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose)); 780 } 781 PetscFunctionReturn(PETSC_SUCCESS); 782 } 783 784 /** 785 @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 786 787 Not collective across MPI processes. 788 789 @param[in,out] mat `MATCEED` 790 @param[out] X_loc Input PETSc local vector, or NULL 791 @param[out] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 792 793 @return An error code: 0 - success, otherwise - failure 794 **/ 795 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) { 796 MatCeedContext ctx; 797 798 PetscFunctionBeginUser; 799 PetscCall(MatShellGetContext(mat, &ctx)); 800 if (X_loc) { 801 *X_loc = ctx->X_loc; 802 PetscCall(PetscObjectReference((PetscObject)*X_loc)); 803 } 804 if (Y_loc_transpose) { 805 *Y_loc_transpose = ctx->Y_loc_transpose; 806 PetscCall(PetscObjectReference((PetscObject)*Y_loc_transpose)); 807 } 808 PetscFunctionReturn(PETSC_SUCCESS); 809 } 810 811 /** 812 @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 813 814 Not collective across MPI processes. 815 816 @param[in,out] mat MatCeed 817 @param[out] X_loc Input PETSc local vector, or NULL 818 @param[out] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 819 820 @return An error code: 0 - success, otherwise - failure 821 **/ 822 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) { 823 PetscFunctionBeginUser; 824 if (X_loc) PetscCall(VecDestroy(X_loc)); 825 if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose)); 826 PetscFunctionReturn(PETSC_SUCCESS); 827 } 828 829 /** 830 @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 831 832 Not collective across MPI processes. 833 834 @param[in,out] mat MatCeed 835 @param[out] op_mult libCEED `CeedOperator` for `MatMult()`, or NULL 836 @param[out] op_mult_transpose libCEED `CeedOperator` for `MatMultTranspose()`, or NULL 837 838 @return An error code: 0 - success, otherwise - failure 839 **/ 840 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) { 841 MatCeedContext ctx; 842 843 PetscFunctionBeginUser; 844 PetscCall(MatShellGetContext(mat, &ctx)); 845 if (op_mult) { 846 *op_mult = NULL; 847 PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult)); 848 } 849 if (op_mult_transpose) { 850 *op_mult_transpose = NULL; 851 PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose)); 852 } 853 PetscFunctionReturn(PETSC_SUCCESS); 854 } 855 856 /** 857 @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations. 858 859 Not collective across MPI processes. 860 861 @param[in,out] mat MatCeed 862 @param[out] op_mult libCEED `CeedOperator` for `MatMult()`, or NULL 863 @param[out] op_mult_transpose libCEED `CeedOperator` for `MatMultTranspose()`, or NULL 864 865 @return An error code: 0 - success, otherwise - failure 866 **/ 867 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) { 868 MatCeedContext ctx; 869 870 PetscFunctionBeginUser; 871 PetscCall(MatShellGetContext(mat, &ctx)); 872 if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult)); 873 if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose)); 874 PetscFunctionReturn(PETSC_SUCCESS); 875 } 876 877 /** 878 @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators. 879 880 Not collective across MPI processes. 881 882 @param[in,out] mat MatCeed 883 @param[out] log_event_mult `PetscLogEvent` for forward evaluation, or NULL 884 @param[out] log_event_mult_transpose `PetscLogEvent` for transpose evaluation, or NULL 885 886 @return An error code: 0 - success, otherwise - failure 887 **/ 888 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) { 889 MatCeedContext ctx; 890 891 PetscFunctionBeginUser; 892 PetscCall(MatShellGetContext(mat, &ctx)); 893 if (log_event_mult) ctx->log_event_mult = log_event_mult; 894 if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose; 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 /** 899 @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators. 900 901 Not collective across MPI processes. 902 903 @param[in,out] mat MatCeed 904 @param[out] log_event_mult `PetscLogEvent` for forward evaluation, or NULL 905 @param[out] log_event_mult_transpose `PetscLogEvent` for transpose evaluation, or NULL 906 907 @return An error code: 0 - success, otherwise - failure 908 **/ 909 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) { 910 MatCeedContext ctx; 911 912 PetscFunctionBeginUser; 913 PetscCall(MatShellGetContext(mat, &ctx)); 914 if (log_event_mult) *log_event_mult = ctx->log_event_mult; 915 if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose; 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 919 // ----------------------------------------------------------------------------- 920 // Operator context data 921 // ----------------------------------------------------------------------------- 922 923 /** 924 @brief Setup context data for operator application. 925 926 Collective across MPI processes. 927 928 @param[in] dm_x Input `DM` 929 @param[in] dm_y Output `DM` 930 @param[in] X_loc Input PETSc local vector, or NULL 931 @param[in] Y_loc_transpose Input PETSc local vector for transpose operation, or NULL 932 @param[in] op_mult `CeedOperator` for forward evaluation 933 @param[in] op_mult_transpose `CeedOperator` for transpose evaluation 934 @param[in] log_event_mult `PetscLogEvent` for forward evaluation 935 @param[in] log_event_mult_transpose `PetscLogEvent` for transpose evaluation 936 @param[out] ctx Context data for operator evaluation 937 938 @return An error code: 0 - success, otherwise - failure 939 **/ 940 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose, 941 PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, MatCeedContext *ctx) { 942 CeedSize x_loc_len, y_loc_len; 943 944 PetscFunctionBeginUser; 945 946 // Allocate 947 PetscCall(PetscNew(ctx)); 948 (*ctx)->ref_count = 1; 949 950 // Logging 951 (*ctx)->log_event_mult = log_event_mult; 952 (*ctx)->log_event_mult_transpose = log_event_mult_transpose; 953 954 // PETSc objects 955 PetscCall(PetscObjectReference((PetscObject)dm_x)); 956 (*ctx)->dm_x = dm_x; 957 PetscCall(PetscObjectReference((PetscObject)dm_y)); 958 (*ctx)->dm_y = dm_y; 959 if (X_loc) PetscCall(PetscObjectReference((PetscObject)X_loc)); 960 (*ctx)->X_loc = X_loc; 961 if (Y_loc_transpose) PetscCall(PetscObjectReference((PetscObject)Y_loc_transpose)); 962 (*ctx)->Y_loc_transpose = Y_loc_transpose; 963 964 // Memtype 965 { 966 const PetscScalar *x; 967 Vec X; 968 969 PetscCall(DMGetLocalVector(dm_x, &X)); 970 PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type)); 971 PetscCall(VecRestoreArrayReadAndMemType(X, &x)); 972 PetscCall(DMRestoreLocalVector(dm_x, &X)); 973 } 974 975 // libCEED objects 976 PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, 977 "retrieving Ceed context object failed"); 978 PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed)); 979 PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len)); 980 PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult)); 981 if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose)); 982 PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc)); 983 PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc)); 984 985 // Flop counting 986 { 987 CeedSize ceed_flops_estimate = 0; 988 989 PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate)); 990 (*ctx)->flops_mult = ceed_flops_estimate; 991 if (op_mult_transpose) { 992 PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate)); 993 (*ctx)->flops_mult_transpose = ceed_flops_estimate; 994 } 995 } 996 997 // Check sizes 998 if (x_loc_len > 0 || y_loc_len > 0) { 999 CeedSize ctx_x_loc_len, ctx_y_loc_len; 1000 PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len; 1001 Vec dm_X_loc, dm_Y_loc; 1002 1003 // -- Input 1004 PetscCall(DMGetLocalVector(dm_x, &dm_X_loc)); 1005 PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len)); 1006 PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc)); 1007 PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len)); 1008 if (X_loc) { 1009 PetscCall(VecGetLocalSize(X_loc, &X_loc_len)); 1010 PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, 1011 "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len); 1012 } 1013 PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", 1014 x_loc_len, dm_x_loc_len); 1015 PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")", 1016 x_loc_len, ctx_x_loc_len); 1017 1018 // -- Output 1019 PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc)); 1020 PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len)); 1021 PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc)); 1022 PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len)); 1023 PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", 1024 ctx_y_loc_len, dm_y_loc_len); 1025 1026 // -- Transpose 1027 if (Y_loc_transpose) { 1028 PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len)); 1029 PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, 1030 "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len); 1031 } 1032 } 1033 PetscFunctionReturn(PETSC_SUCCESS); 1034 } 1035 1036 /** 1037 @brief Increment reference counter for `MATCEED` context. 1038 1039 Not collective across MPI processes. 1040 1041 @param[in,out] ctx Context data 1042 1043 @return An error code: 0 - success, otherwise - failure 1044 **/ 1045 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) { 1046 PetscFunctionBeginUser; 1047 ctx->ref_count++; 1048 PetscFunctionReturn(PETSC_SUCCESS); 1049 } 1050 1051 /** 1052 @brief Copy reference for `MATCEED`. 1053 Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`. 1054 1055 Not collective across MPI processes. 1056 1057 @param[in] ctx Context data 1058 @param[out] ctx_copy Copy of pointer to context data 1059 1060 @return An error code: 0 - success, otherwise - failure 1061 **/ 1062 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) { 1063 PetscFunctionBeginUser; 1064 PetscCall(MatCeedContextReference(ctx)); 1065 PetscCall(MatCeedContextDestroy(*ctx_copy)); 1066 *ctx_copy = ctx; 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 /** 1071 @brief Destroy context data for operator application. 1072 1073 Collective across MPI processes. 1074 1075 @param[in,out] ctx Context data for operator evaluation 1076 1077 @return An error code: 0 - success, otherwise - failure 1078 **/ 1079 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) { 1080 PetscFunctionBeginUser; 1081 if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS); 1082 1083 // PETSc objects 1084 PetscCall(DMDestroy(&ctx->dm_x)); 1085 PetscCall(DMDestroy(&ctx->dm_y)); 1086 PetscCall(VecDestroy(&ctx->X_loc)); 1087 PetscCall(VecDestroy(&ctx->Y_loc_transpose)); 1088 PetscCall(MatDestroy(&ctx->mat_assembled_full_internal)); 1089 PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal)); 1090 PetscCall(PetscFree(ctx->internal_mat_type)); 1091 PetscCall(PetscFree(ctx->mats_assembled_full)); 1092 PetscCall(PetscFree(ctx->mats_assembled_pbd)); 1093 1094 // libCEED objects 1095 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc)); 1096 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc)); 1097 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full)); 1098 PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd)); 1099 PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult)); 1100 PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose)); 1101 PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed"); 1102 1103 // Deallocate 1104 ctx->is_destroyed = PETSC_TRUE; // Flag as destroyed in case someone has stale ref 1105 PetscCall(PetscFree(ctx)); 1106 PetscFunctionReturn(PETSC_SUCCESS); 1107 } 1108 1109 /** 1110 @brief Compute the diagonal of an operator via libCEED. 1111 1112 Collective across MPI processes. 1113 1114 @param[in] A `MATCEED` 1115 @param[out] D Vector holding operator diagonal 1116 1117 @return An error code: 0 - success, otherwise - failure 1118 **/ 1119 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) { 1120 PetscMemType mem_type; 1121 Vec D_loc; 1122 MatCeedContext ctx; 1123 1124 PetscFunctionBeginUser; 1125 PetscCall(MatShellGetContext(A, &ctx)); 1126 1127 // Place PETSc vector in libCEED vector 1128 PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc)); 1129 PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc)); 1130 1131 // Compute Diagonal 1132 PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE)); 1133 1134 // Restore PETSc vector 1135 PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc)); 1136 1137 // Local-to-Global 1138 PetscCall(VecZeroEntries(D)); 1139 PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D)); 1140 PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc)); 1141 PetscFunctionReturn(PETSC_SUCCESS); 1142 } 1143 1144 /** 1145 @brief Compute `A X = Y` for a `MATCEED`. 1146 1147 Collective across MPI processes. 1148 1149 @param[in] A `MATCEED` 1150 @param[in] X Input PETSc vector 1151 @param[out] Y Output PETSc vector 1152 1153 @return An error code: 0 - success, otherwise - failure 1154 **/ 1155 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) { 1156 MatCeedContext ctx; 1157 1158 PetscFunctionBeginUser; 1159 PetscCall(MatShellGetContext(A, &ctx)); 1160 PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, 0)); 1161 1162 { 1163 PetscMemType x_mem_type, y_mem_type; 1164 Vec X_loc = ctx->X_loc, Y_loc; 1165 1166 // Get local vectors 1167 if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc)); 1168 PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc)); 1169 1170 // Global-to-local 1171 PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc)); 1172 1173 // Setup libCEED vectors 1174 PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc)); 1175 PetscCall(VecZeroEntries(Y_loc)); 1176 PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc)); 1177 1178 // Apply libCEED operator 1179 PetscCall(PetscLogGpuTimeBegin()); 1180 PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE)); 1181 PetscCall(PetscLogGpuTimeEnd()); 1182 1183 // Restore PETSc vectors 1184 PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc)); 1185 PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc)); 1186 1187 // Local-to-global 1188 PetscCall(VecZeroEntries(Y)); 1189 PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y)); 1190 1191 // Restore local vectors, as needed 1192 if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc)); 1193 PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc)); 1194 } 1195 1196 // Log flops 1197 if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult)); 1198 else PetscCall(PetscLogFlops(ctx->flops_mult)); 1199 1200 PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, 0)); 1201 PetscFunctionReturn(PETSC_SUCCESS); 1202 } 1203 1204 /** 1205 @brief Compute `A^T Y = X` for a `MATCEED`. 1206 1207 Collective across MPI processes. 1208 1209 @param[in] A `MATCEED` 1210 @param[in] Y Input PETSc vector 1211 @param[out] X Output PETSc vector 1212 1213 @return An error code: 0 - success, otherwise - failure 1214 **/ 1215 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) { 1216 MatCeedContext ctx; 1217 1218 PetscFunctionBeginUser; 1219 PetscCall(MatShellGetContext(A, &ctx)); 1220 PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, 0)); 1221 1222 { 1223 PetscMemType x_mem_type, y_mem_type; 1224 Vec X_loc, Y_loc = ctx->Y_loc_transpose; 1225 1226 // Get local vectors 1227 if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc)); 1228 PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc)); 1229 1230 // Global-to-local 1231 PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc)); 1232 1233 // Setup libCEED vectors 1234 PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc)); 1235 PetscCall(VecZeroEntries(X_loc)); 1236 PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc)); 1237 1238 // Apply libCEED operator 1239 PetscCall(PetscLogGpuTimeBegin()); 1240 PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE)); 1241 PetscCall(PetscLogGpuTimeEnd()); 1242 1243 // Restore PETSc vectors 1244 PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc)); 1245 PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc)); 1246 1247 // Local-to-global 1248 PetscCall(VecZeroEntries(X)); 1249 PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X)); 1250 1251 // Restore local vectors, as needed 1252 if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc)); 1253 PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc)); 1254 } 1255 1256 // Log flops 1257 if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose)); 1258 else PetscCall(PetscLogFlops(ctx->flops_mult_transpose)); 1259 1260 PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, 0)); 1261 PetscFunctionReturn(PETSC_SUCCESS); 1262 } 1263