1 #include <petscconf.h> 2 #include <petscdevice.h> 3 #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/ 4 5 static PetscErrorCode MatMPISELLSetPreallocation_MPISELLCUDA(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[]) 6 { 7 Mat_MPISELL *b = (Mat_MPISELL *)B->data; 8 9 PetscFunctionBegin; 10 PetscCall(PetscLayoutSetUp(B->rmap)); 11 PetscCall(PetscLayoutSetUp(B->cmap)); 12 13 if (!B->preallocated) { 14 /* Explicitly create 2 MATSEQSELLCUDA matrices. */ 15 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 16 PetscCall(MatBindToCPU(b->A, B->boundtocpu)); 17 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 18 PetscCall(MatSetType(b->A, MATSEQSELLCUDA)); 19 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 20 PetscCall(MatBindToCPU(b->B, B->boundtocpu)); 21 PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N)); 22 PetscCall(MatSetType(b->B, MATSEQSELLCUDA)); 23 } 24 PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen)); 25 PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen)); 26 B->preallocated = PETSC_TRUE; 27 B->was_assembled = PETSC_FALSE; 28 B->assembled = PETSC_FALSE; 29 PetscFunctionReturn(PETSC_SUCCESS); 30 } 31 32 static PetscErrorCode MatSetFromOptions_MPISELLCUDA(Mat, PetscOptionItems) 33 { 34 return PETSC_SUCCESS; 35 } 36 37 static PetscErrorCode MatAssemblyEnd_MPISELLCUDA(Mat A, MatAssemblyType mode) 38 { 39 PetscFunctionBegin; 40 PetscCall(MatAssemblyEnd_MPISELL(A, mode)); 41 if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(VecSetType(((Mat_MPISELL *)A->data)->lvec, VECSEQCUDA)); 42 PetscFunctionReturn(PETSC_SUCCESS); 43 } 44 45 static PetscErrorCode MatDestroy_MPISELLCUDA(Mat A) 46 { 47 PetscFunctionBegin; 48 PetscCall(MatDestroy_MPISELL(A)); 49 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", NULL)); 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat B, MatType, MatReuse reuse, Mat *newmat) 54 { 55 Mat_MPISELL *a; 56 Mat A; 57 58 PetscFunctionBegin; 59 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 60 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat)); 61 else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN)); 62 A = *newmat; 63 A->boundtocpu = PETSC_FALSE; 64 PetscCall(PetscFree(A->defaultvectype)); 65 PetscCall(PetscStrallocpy(VECCUDA, &A->defaultvectype)); 66 67 a = (Mat_MPISELL *)A->data; 68 if (a->A) PetscCall(MatSetType(a->A, MATSEQSELLCUDA)); 69 if (a->B) PetscCall(MatSetType(a->B, MATSEQSELLCUDA)); 70 if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQCUDA)); 71 72 A->ops->assemblyend = MatAssemblyEnd_MPISELLCUDA; 73 A->ops->setfromoptions = MatSetFromOptions_MPISELLCUDA; 74 A->ops->destroy = MatDestroy_MPISELLCUDA; 75 76 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPISELLCUDA)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELLCUDA)); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 PETSC_EXTERN PetscErrorCode MatCreate_MPISELLCUDA(Mat A) 82 { 83 PetscFunctionBegin; 84 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 85 PetscCall(MatCreate_MPISELL(A)); 86 PetscCall(MatConvert_MPISELL_MPISELLCUDA(A, MATMPISELLCUDA, MAT_INPLACE_MATRIX, &A)); 87 PetscFunctionReturn(PETSC_SUCCESS); 88 } 89 90 /*@ 91 MatCreateSELLCUDA - Creates a sparse matrix in SELL format. 92 This matrix will ultimately pushed down to NVIDIA GPUs. 93 94 Collective 95 96 Input Parameters: 97 + comm - MPI communicator, set to `PETSC_COMM_SELF` 98 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 99 This value should be the same as the local size used in creating the 100 y vector for the matrix-vector product y = Ax. 101 . n - This value should be the same as the local size used in creating the 102 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 103 calculated if `N` is given) For square matrices `n` is almost always `m`. 104 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 105 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 106 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 107 (same value is used for all local rows) 108 . d_nnz - array containing the number of nonzeros in the various rows of the 109 DIAGONAL portion of the local submatrix (possibly different for each row) 110 or `NULL`, if `d_nz` is used to specify the nonzero structure. 111 The size of this array is equal to the number of local rows, i.e `m`. 112 For matrices you plan to factor you must leave room for the diagonal entry and 113 put in the entry even if it is zero. 114 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 115 submatrix (same value is used for all local rows). 116 - o_nnz - array containing the number of nonzeros in the various rows of the 117 OFF-DIAGONAL portion of the local submatrix (possibly different for 118 each row) or `NULL`, if `o_nz` is used to specify the nonzero 119 structure. The size of this array is equal to the number 120 of local rows, i.e `m`. 121 122 Output Parameter: 123 . A - the matrix 124 125 Level: intermediate 126 127 Notes: 128 If `nnz` is given then `nz` is ignored 129 130 Specify the preallocated storage with either `nz` or `nnz` (not both). 131 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 132 allocation. 133 134 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MATMPISELLCUDA`, `MATSELLCUDA` 135 @*/ 136 PetscErrorCode MatCreateSELLCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 137 { 138 PetscMPIInt size; 139 140 PetscFunctionBegin; 141 PetscCall(MatCreate(comm, A)); 142 PetscCall(MatSetSizes(*A, m, n, M, N)); 143 PetscCallMPI(MPI_Comm_size(comm, &size)); 144 if (size > 1) { 145 PetscCall(MatSetType(*A, MATMPISELLCUDA)); 146 PetscCall(MatMPISELLSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 147 } else { 148 PetscCall(MatSetType(*A, MATSEQSELLCUDA)); 149 PetscCall(MatSeqSELLSetPreallocation(*A, d_nz, d_nnz)); 150 } 151 PetscFunctionReturn(PETSC_SUCCESS); 152 } 153 154 /*MC 155 MATSELLCUDA - "sellcuda" = "mpisellcuda" - A matrix type to be used for sparse matrices. 156 157 Sliced ELLPACK matrix type whose data resides on NVIDIA GPUs. 158 159 This matrix type is identical to `MATSEQSELLCUDA` when constructed with a single process communicator, 160 and `MATMPISELLCUDA` otherwise. As a result, for single process communicators, 161 `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported 162 for communicators controlling multiple processes. It is recommended that you call both of 163 the above preallocation routines for simplicity. 164 165 Options Database Key: 166 . -mat_type mpisellcuda - sets the matrix type to `MATMPISELLCUDA` during a call to MatSetFromOptions() 167 168 Level: beginner 169 170 .seealso: `MatCreateSELLCUDA()`, `MATSEQSELLCUDA`, `MatCreateSeqSELLCUDA()`, `MatCUDAFormatOperation()` 171 M*/ 172