12d1451d4SHong Zhang #include <petscconf.h>
28df136f9SHong Zhang #include <petscdevice.h>
32d1451d4SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/
42d1451d4SHong Zhang
MatMPISELLSetPreallocation_MPISELLCUDA(Mat B,PetscInt d_rlenmax,const PetscInt d_rlen[],PetscInt o_rlenmax,const PetscInt o_rlen[])566976f2fSJacob Faibussowitsch static PetscErrorCode MatMPISELLSetPreallocation_MPISELLCUDA(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
62d1451d4SHong Zhang {
72d1451d4SHong Zhang Mat_MPISELL *b = (Mat_MPISELL *)B->data;
82d1451d4SHong Zhang
92d1451d4SHong Zhang PetscFunctionBegin;
102d1451d4SHong Zhang PetscCall(PetscLayoutSetUp(B->rmap));
112d1451d4SHong Zhang PetscCall(PetscLayoutSetUp(B->cmap));
122d1451d4SHong Zhang
132d1451d4SHong Zhang if (!B->preallocated) {
142d1451d4SHong Zhang /* Explicitly create 2 MATSEQSELLCUDA matrices. */
152d1451d4SHong Zhang PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
162d1451d4SHong Zhang PetscCall(MatBindToCPU(b->A, B->boundtocpu));
172d1451d4SHong Zhang PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
182d1451d4SHong Zhang PetscCall(MatSetType(b->A, MATSEQSELLCUDA));
192d1451d4SHong Zhang PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
202d1451d4SHong Zhang PetscCall(MatBindToCPU(b->B, B->boundtocpu));
212d1451d4SHong Zhang PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
222d1451d4SHong Zhang PetscCall(MatSetType(b->B, MATSEQSELLCUDA));
232d1451d4SHong Zhang }
242d1451d4SHong Zhang PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
252d1451d4SHong Zhang PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
262d1451d4SHong Zhang B->preallocated = PETSC_TRUE;
272d1451d4SHong Zhang B->was_assembled = PETSC_FALSE;
282d1451d4SHong Zhang B->assembled = PETSC_FALSE;
292d1451d4SHong Zhang PetscFunctionReturn(PETSC_SUCCESS);
302d1451d4SHong Zhang }
312d1451d4SHong Zhang
MatSetFromOptions_MPISELLCUDA(Mat,PetscOptionItems)32*ce78bad3SBarry Smith static PetscErrorCode MatSetFromOptions_MPISELLCUDA(Mat, PetscOptionItems)
332d1451d4SHong Zhang {
348df136f9SHong Zhang return PETSC_SUCCESS;
352d1451d4SHong Zhang }
362d1451d4SHong Zhang
MatAssemblyEnd_MPISELLCUDA(Mat A,MatAssemblyType mode)3766976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_MPISELLCUDA(Mat A, MatAssemblyType mode)
382d1451d4SHong Zhang {
392d1451d4SHong Zhang PetscFunctionBegin;
402d1451d4SHong Zhang PetscCall(MatAssemblyEnd_MPISELL(A, mode));
418df136f9SHong Zhang if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(VecSetType(((Mat_MPISELL *)A->data)->lvec, VECSEQCUDA));
422d1451d4SHong Zhang PetscFunctionReturn(PETSC_SUCCESS);
432d1451d4SHong Zhang }
442d1451d4SHong Zhang
MatDestroy_MPISELLCUDA(Mat A)4566976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MPISELLCUDA(Mat A)
462d1451d4SHong Zhang {
472d1451d4SHong Zhang PetscFunctionBegin;
482d1451d4SHong Zhang PetscCall(MatDestroy_MPISELL(A));
492d1451d4SHong Zhang PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", NULL));
502d1451d4SHong Zhang PetscFunctionReturn(PETSC_SUCCESS);
512d1451d4SHong Zhang }
522d1451d4SHong Zhang
MatConvert_MPISELL_MPISELLCUDA(Mat B,MatType,MatReuse reuse,Mat * newmat)538df136f9SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat B, MatType, MatReuse reuse, Mat *newmat)
548df136f9SHong Zhang {
558df136f9SHong Zhang Mat_MPISELL *a;
568df136f9SHong Zhang Mat A;
578df136f9SHong Zhang
588df136f9SHong Zhang PetscFunctionBegin;
598df136f9SHong Zhang PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
608df136f9SHong Zhang if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat));
618df136f9SHong Zhang else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN));
628df136f9SHong Zhang A = *newmat;
638df136f9SHong Zhang A->boundtocpu = PETSC_FALSE;
648df136f9SHong Zhang PetscCall(PetscFree(A->defaultvectype));
658df136f9SHong Zhang PetscCall(PetscStrallocpy(VECCUDA, &A->defaultvectype));
668df136f9SHong Zhang
678df136f9SHong Zhang a = (Mat_MPISELL *)A->data;
688df136f9SHong Zhang if (a->A) PetscCall(MatSetType(a->A, MATSEQSELLCUDA));
698df136f9SHong Zhang if (a->B) PetscCall(MatSetType(a->B, MATSEQSELLCUDA));
708df136f9SHong Zhang if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQCUDA));
718df136f9SHong Zhang
728df136f9SHong Zhang A->ops->assemblyend = MatAssemblyEnd_MPISELLCUDA;
738df136f9SHong Zhang A->ops->setfromoptions = MatSetFromOptions_MPISELLCUDA;
748df136f9SHong Zhang A->ops->destroy = MatDestroy_MPISELLCUDA;
758df136f9SHong Zhang
768df136f9SHong Zhang PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPISELLCUDA));
778df136f9SHong Zhang PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELLCUDA));
788df136f9SHong Zhang PetscFunctionReturn(PETSC_SUCCESS);
798df136f9SHong Zhang }
808df136f9SHong Zhang
MatCreate_MPISELLCUDA(Mat A)812d1451d4SHong Zhang PETSC_EXTERN PetscErrorCode MatCreate_MPISELLCUDA(Mat A)
822d1451d4SHong Zhang {
832d1451d4SHong Zhang PetscFunctionBegin;
848df136f9SHong Zhang PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
852d1451d4SHong Zhang PetscCall(MatCreate_MPISELL(A));
868df136f9SHong Zhang PetscCall(MatConvert_MPISELL_MPISELLCUDA(A, MATMPISELLCUDA, MAT_INPLACE_MATRIX, &A));
872d1451d4SHong Zhang PetscFunctionReturn(PETSC_SUCCESS);
882d1451d4SHong Zhang }
892d1451d4SHong Zhang
902d1451d4SHong Zhang /*@
912d1451d4SHong Zhang MatCreateSELLCUDA - Creates a sparse matrix in SELL format.
922d1451d4SHong Zhang This matrix will ultimately pushed down to NVIDIA GPUs.
932d1451d4SHong Zhang
942d1451d4SHong Zhang Collective
952d1451d4SHong Zhang
962d1451d4SHong Zhang Input Parameters:
972d1451d4SHong Zhang + comm - MPI communicator, set to `PETSC_COMM_SELF`
982d1451d4SHong Zhang . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
992d1451d4SHong Zhang This value should be the same as the local size used in creating the
1002d1451d4SHong Zhang y vector for the matrix-vector product y = Ax.
1012d1451d4SHong Zhang . n - This value should be the same as the local size used in creating the
1022d1451d4SHong Zhang x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1032d1451d4SHong Zhang calculated if `N` is given) For square matrices `n` is almost always `m`.
1042d1451d4SHong Zhang . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
1052d1451d4SHong Zhang . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
1062d1451d4SHong Zhang . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
1072d1451d4SHong Zhang (same value is used for all local rows)
1082d1451d4SHong Zhang . d_nnz - array containing the number of nonzeros in the various rows of the
1092d1451d4SHong Zhang DIAGONAL portion of the local submatrix (possibly different for each row)
1102d1451d4SHong Zhang or `NULL`, if `d_nz` is used to specify the nonzero structure.
1112d1451d4SHong Zhang The size of this array is equal to the number of local rows, i.e `m`.
1122d1451d4SHong Zhang For matrices you plan to factor you must leave room for the diagonal entry and
1132d1451d4SHong Zhang put in the entry even if it is zero.
1142d1451d4SHong Zhang . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
1152d1451d4SHong Zhang submatrix (same value is used for all local rows).
1162d1451d4SHong Zhang - o_nnz - array containing the number of nonzeros in the various rows of the
1172d1451d4SHong Zhang OFF-DIAGONAL portion of the local submatrix (possibly different for
1182d1451d4SHong Zhang each row) or `NULL`, if `o_nz` is used to specify the nonzero
1192d1451d4SHong Zhang structure. The size of this array is equal to the number
1202d1451d4SHong Zhang of local rows, i.e `m`.
1212d1451d4SHong Zhang
1222d1451d4SHong Zhang Output Parameter:
1232d1451d4SHong Zhang . A - the matrix
1242d1451d4SHong Zhang
1252d1451d4SHong Zhang Level: intermediate
1262d1451d4SHong Zhang
1272d1451d4SHong Zhang Notes:
1282d1451d4SHong Zhang If `nnz` is given then `nz` is ignored
1292d1451d4SHong Zhang
1302d1451d4SHong Zhang Specify the preallocated storage with either `nz` or `nnz` (not both).
1312d1451d4SHong Zhang Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
1322d1451d4SHong Zhang allocation.
1332d1451d4SHong Zhang
134be50c303SSatish Balay .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MATMPISELLCUDA`, `MATSELLCUDA`
1352d1451d4SHong Zhang @*/
MatCreateSELLCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)1362d1451d4SHong Zhang PetscErrorCode MatCreateSELLCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
1372d1451d4SHong Zhang {
1382d1451d4SHong Zhang PetscMPIInt size;
1392d1451d4SHong Zhang
1402d1451d4SHong Zhang PetscFunctionBegin;
1412d1451d4SHong Zhang PetscCall(MatCreate(comm, A));
1422d1451d4SHong Zhang PetscCall(MatSetSizes(*A, m, n, M, N));
1432d1451d4SHong Zhang PetscCallMPI(MPI_Comm_size(comm, &size));
1442d1451d4SHong Zhang if (size > 1) {
1452d1451d4SHong Zhang PetscCall(MatSetType(*A, MATMPISELLCUDA));
1462d1451d4SHong Zhang PetscCall(MatMPISELLSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
1472d1451d4SHong Zhang } else {
1482d1451d4SHong Zhang PetscCall(MatSetType(*A, MATSEQSELLCUDA));
1492d1451d4SHong Zhang PetscCall(MatSeqSELLSetPreallocation(*A, d_nz, d_nnz));
1502d1451d4SHong Zhang }
1512d1451d4SHong Zhang PetscFunctionReturn(PETSC_SUCCESS);
1522d1451d4SHong Zhang }
1532d1451d4SHong Zhang
1542d1451d4SHong Zhang /*MC
1552d1451d4SHong Zhang MATSELLCUDA - "sellcuda" = "mpisellcuda" - A matrix type to be used for sparse matrices.
1562d1451d4SHong Zhang
1572d1451d4SHong Zhang Sliced ELLPACK matrix type whose data resides on NVIDIA GPUs.
1582d1451d4SHong Zhang
1592d1451d4SHong Zhang This matrix type is identical to `MATSEQSELLCUDA` when constructed with a single process communicator,
1602d1451d4SHong Zhang and `MATMPISELLCUDA` otherwise. As a result, for single process communicators,
1612d1451d4SHong Zhang `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
1622d1451d4SHong Zhang for communicators controlling multiple processes. It is recommended that you call both of
1632d1451d4SHong Zhang the above preallocation routines for simplicity.
1642d1451d4SHong Zhang
1652d1451d4SHong Zhang Options Database Key:
1662d1451d4SHong Zhang . -mat_type mpisellcuda - sets the matrix type to `MATMPISELLCUDA` during a call to MatSetFromOptions()
1672d1451d4SHong Zhang
1682d1451d4SHong Zhang Level: beginner
1692d1451d4SHong Zhang
1702d1451d4SHong Zhang .seealso: `MatCreateSELLCUDA()`, `MATSEQSELLCUDA`, `MatCreateSeqSELLCUDA()`, `MatCUDAFormatOperation()`
1712d1451d4SHong Zhang M*/
172