xref: /petsc/src/mat/impls/sell/mpi/mpicuda/mpisellcuda.cu (revision 6d8694c4fbab79f9439f1ad13c0386ba7ee1ca4b)
12d1451d4SHong Zhang #include <petscconf.h>
28df136f9SHong Zhang #include <petscdevice.h>
32d1451d4SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/
42d1451d4SHong Zhang 
MatMPISELLSetPreallocation_MPISELLCUDA(Mat B,PetscInt d_rlenmax,const PetscInt d_rlen[],PetscInt o_rlenmax,const PetscInt o_rlen[])566976f2fSJacob Faibussowitsch static PetscErrorCode MatMPISELLSetPreallocation_MPISELLCUDA(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
62d1451d4SHong Zhang {
72d1451d4SHong Zhang   Mat_MPISELL *b = (Mat_MPISELL *)B->data;
82d1451d4SHong Zhang 
92d1451d4SHong Zhang   PetscFunctionBegin;
102d1451d4SHong Zhang   PetscCall(PetscLayoutSetUp(B->rmap));
112d1451d4SHong Zhang   PetscCall(PetscLayoutSetUp(B->cmap));
122d1451d4SHong Zhang 
132d1451d4SHong Zhang   if (!B->preallocated) {
142d1451d4SHong Zhang     /* Explicitly create 2 MATSEQSELLCUDA matrices. */
152d1451d4SHong Zhang     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
162d1451d4SHong Zhang     PetscCall(MatBindToCPU(b->A, B->boundtocpu));
172d1451d4SHong Zhang     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
182d1451d4SHong Zhang     PetscCall(MatSetType(b->A, MATSEQSELLCUDA));
192d1451d4SHong Zhang     PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
202d1451d4SHong Zhang     PetscCall(MatBindToCPU(b->B, B->boundtocpu));
212d1451d4SHong Zhang     PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
222d1451d4SHong Zhang     PetscCall(MatSetType(b->B, MATSEQSELLCUDA));
232d1451d4SHong Zhang   }
242d1451d4SHong Zhang   PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
252d1451d4SHong Zhang   PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
262d1451d4SHong Zhang   B->preallocated  = PETSC_TRUE;
272d1451d4SHong Zhang   B->was_assembled = PETSC_FALSE;
282d1451d4SHong Zhang   B->assembled     = PETSC_FALSE;
292d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
302d1451d4SHong Zhang }
312d1451d4SHong Zhang 
MatSetFromOptions_MPISELLCUDA(Mat,PetscOptionItems)32*ce78bad3SBarry Smith static PetscErrorCode MatSetFromOptions_MPISELLCUDA(Mat, PetscOptionItems)
332d1451d4SHong Zhang {
348df136f9SHong Zhang   return PETSC_SUCCESS;
352d1451d4SHong Zhang }
362d1451d4SHong Zhang 
MatAssemblyEnd_MPISELLCUDA(Mat A,MatAssemblyType mode)3766976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_MPISELLCUDA(Mat A, MatAssemblyType mode)
382d1451d4SHong Zhang {
392d1451d4SHong Zhang   PetscFunctionBegin;
402d1451d4SHong Zhang   PetscCall(MatAssemblyEnd_MPISELL(A, mode));
418df136f9SHong Zhang   if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(VecSetType(((Mat_MPISELL *)A->data)->lvec, VECSEQCUDA));
422d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
432d1451d4SHong Zhang }
442d1451d4SHong Zhang 
MatDestroy_MPISELLCUDA(Mat A)4566976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MPISELLCUDA(Mat A)
462d1451d4SHong Zhang {
472d1451d4SHong Zhang   PetscFunctionBegin;
482d1451d4SHong Zhang   PetscCall(MatDestroy_MPISELL(A));
492d1451d4SHong Zhang   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", NULL));
502d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
512d1451d4SHong Zhang }
522d1451d4SHong Zhang 
MatConvert_MPISELL_MPISELLCUDA(Mat B,MatType,MatReuse reuse,Mat * newmat)538df136f9SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat B, MatType, MatReuse reuse, Mat *newmat)
548df136f9SHong Zhang {
558df136f9SHong Zhang   Mat_MPISELL *a;
568df136f9SHong Zhang   Mat          A;
578df136f9SHong Zhang 
588df136f9SHong Zhang   PetscFunctionBegin;
598df136f9SHong Zhang   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
608df136f9SHong Zhang   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat));
618df136f9SHong Zhang   else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN));
628df136f9SHong Zhang   A             = *newmat;
638df136f9SHong Zhang   A->boundtocpu = PETSC_FALSE;
648df136f9SHong Zhang   PetscCall(PetscFree(A->defaultvectype));
658df136f9SHong Zhang   PetscCall(PetscStrallocpy(VECCUDA, &A->defaultvectype));
668df136f9SHong Zhang 
678df136f9SHong Zhang   a = (Mat_MPISELL *)A->data;
688df136f9SHong Zhang   if (a->A) PetscCall(MatSetType(a->A, MATSEQSELLCUDA));
698df136f9SHong Zhang   if (a->B) PetscCall(MatSetType(a->B, MATSEQSELLCUDA));
708df136f9SHong Zhang   if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQCUDA));
718df136f9SHong Zhang 
728df136f9SHong Zhang   A->ops->assemblyend    = MatAssemblyEnd_MPISELLCUDA;
738df136f9SHong Zhang   A->ops->setfromoptions = MatSetFromOptions_MPISELLCUDA;
748df136f9SHong Zhang   A->ops->destroy        = MatDestroy_MPISELLCUDA;
758df136f9SHong Zhang 
768df136f9SHong Zhang   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPISELLCUDA));
778df136f9SHong Zhang   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELLCUDA));
788df136f9SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
798df136f9SHong Zhang }
808df136f9SHong Zhang 
MatCreate_MPISELLCUDA(Mat A)812d1451d4SHong Zhang PETSC_EXTERN PetscErrorCode MatCreate_MPISELLCUDA(Mat A)
822d1451d4SHong Zhang {
832d1451d4SHong Zhang   PetscFunctionBegin;
848df136f9SHong Zhang   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
852d1451d4SHong Zhang   PetscCall(MatCreate_MPISELL(A));
868df136f9SHong Zhang   PetscCall(MatConvert_MPISELL_MPISELLCUDA(A, MATMPISELLCUDA, MAT_INPLACE_MATRIX, &A));
872d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
882d1451d4SHong Zhang }
892d1451d4SHong Zhang 
902d1451d4SHong Zhang /*@
912d1451d4SHong Zhang   MatCreateSELLCUDA - Creates a sparse matrix in SELL format.
922d1451d4SHong Zhang   This matrix will ultimately pushed down to NVIDIA GPUs.
932d1451d4SHong Zhang 
942d1451d4SHong Zhang   Collective
952d1451d4SHong Zhang 
962d1451d4SHong Zhang   Input Parameters:
972d1451d4SHong Zhang + comm  - MPI communicator, set to `PETSC_COMM_SELF`
982d1451d4SHong Zhang . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
992d1451d4SHong Zhang            This value should be the same as the local size used in creating the
1002d1451d4SHong Zhang            y vector for the matrix-vector product y = Ax.
1012d1451d4SHong Zhang . n     - This value should be the same as the local size used in creating the
1022d1451d4SHong Zhang        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1032d1451d4SHong Zhang        calculated if `N` is given) For square matrices `n` is almost always `m`.
1042d1451d4SHong Zhang . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
1052d1451d4SHong Zhang . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
1062d1451d4SHong Zhang . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1072d1451d4SHong Zhang            (same value is used for all local rows)
1082d1451d4SHong Zhang . d_nnz - array containing the number of nonzeros in the various rows of the
1092d1451d4SHong Zhang            DIAGONAL portion of the local submatrix (possibly different for each row)
1102d1451d4SHong Zhang            or `NULL`, if `d_nz` is used to specify the nonzero structure.
1112d1451d4SHong Zhang            The size of this array is equal to the number of local rows, i.e `m`.
1122d1451d4SHong Zhang            For matrices you plan to factor you must leave room for the diagonal entry and
1132d1451d4SHong Zhang            put in the entry even if it is zero.
1142d1451d4SHong Zhang . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1152d1451d4SHong Zhang            submatrix (same value is used for all local rows).
1162d1451d4SHong Zhang - o_nnz - array containing the number of nonzeros in the various rows of the
1172d1451d4SHong Zhang            OFF-DIAGONAL portion of the local submatrix (possibly different for
1182d1451d4SHong Zhang            each row) or `NULL`, if `o_nz` is used to specify the nonzero
1192d1451d4SHong Zhang            structure. The size of this array is equal to the number
1202d1451d4SHong Zhang            of local rows, i.e `m`.
1212d1451d4SHong Zhang 
1222d1451d4SHong Zhang   Output Parameter:
1232d1451d4SHong Zhang . A - the matrix
1242d1451d4SHong Zhang 
1252d1451d4SHong Zhang   Level: intermediate
1262d1451d4SHong Zhang 
1272d1451d4SHong Zhang   Notes:
1282d1451d4SHong Zhang   If `nnz` is given then `nz` is ignored
1292d1451d4SHong Zhang 
1302d1451d4SHong Zhang   Specify the preallocated storage with either `nz` or `nnz` (not both).
1312d1451d4SHong Zhang   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
1322d1451d4SHong Zhang   allocation.
1332d1451d4SHong Zhang 
134be50c303SSatish Balay .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MATMPISELLCUDA`, `MATSELLCUDA`
1352d1451d4SHong Zhang @*/
MatCreateSELLCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)1362d1451d4SHong Zhang PetscErrorCode MatCreateSELLCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
1372d1451d4SHong Zhang {
1382d1451d4SHong Zhang   PetscMPIInt size;
1392d1451d4SHong Zhang 
1402d1451d4SHong Zhang   PetscFunctionBegin;
1412d1451d4SHong Zhang   PetscCall(MatCreate(comm, A));
1422d1451d4SHong Zhang   PetscCall(MatSetSizes(*A, m, n, M, N));
1432d1451d4SHong Zhang   PetscCallMPI(MPI_Comm_size(comm, &size));
1442d1451d4SHong Zhang   if (size > 1) {
1452d1451d4SHong Zhang     PetscCall(MatSetType(*A, MATMPISELLCUDA));
1462d1451d4SHong Zhang     PetscCall(MatMPISELLSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
1472d1451d4SHong Zhang   } else {
1482d1451d4SHong Zhang     PetscCall(MatSetType(*A, MATSEQSELLCUDA));
1492d1451d4SHong Zhang     PetscCall(MatSeqSELLSetPreallocation(*A, d_nz, d_nnz));
1502d1451d4SHong Zhang   }
1512d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
1522d1451d4SHong Zhang }
1532d1451d4SHong Zhang 
1542d1451d4SHong Zhang /*MC
1552d1451d4SHong Zhang    MATSELLCUDA - "sellcuda" = "mpisellcuda" - A matrix type to be used for sparse matrices.
1562d1451d4SHong Zhang 
1572d1451d4SHong Zhang    Sliced ELLPACK matrix type whose data resides on NVIDIA GPUs.
1582d1451d4SHong Zhang 
1592d1451d4SHong Zhang    This matrix type is identical to `MATSEQSELLCUDA` when constructed with a single process communicator,
1602d1451d4SHong Zhang    and `MATMPISELLCUDA` otherwise.  As a result, for single process communicators,
1612d1451d4SHong Zhang    `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
1622d1451d4SHong Zhang    for communicators controlling multiple processes.  It is recommended that you call both of
1632d1451d4SHong Zhang    the above preallocation routines for simplicity.
1642d1451d4SHong Zhang 
1652d1451d4SHong Zhang    Options Database Key:
1662d1451d4SHong Zhang .  -mat_type mpisellcuda - sets the matrix type to `MATMPISELLCUDA` during a call to MatSetFromOptions()
1672d1451d4SHong Zhang 
1682d1451d4SHong Zhang   Level: beginner
1692d1451d4SHong Zhang 
1702d1451d4SHong Zhang .seealso: `MatCreateSELLCUDA()`, `MATSEQSELLCUDA`, `MatCreateSeqSELLCUDA()`, `MatCUDAFormatOperation()`
1712d1451d4SHong Zhang M*/
172