#include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE" PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) { Mat_MPIAIJ *b; PetscErrorCode ierr; PetscInt i; PetscFunctionBegin; if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); if (d_nnz) { for (i=0; irmap->n; i++) { if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); } } if (o_nnz) { for (i=0; irmap->n; i++) { if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); } } b = (Mat_MPIAIJ*)B->data; if (!B->preallocated) { /* Explicitly create 2 MATSEQAIJ matrices. */ ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr); ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); } ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); B->preallocated = PETSC_TRUE; PetscFunctionReturn(0); } EXTERN_C_END #undef __FUNCT__ #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE" PetscErrorCode MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left) { PetscErrorCode ierr; PetscFunctionBegin; if (right) { ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr); ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr); ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr); ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr); } if (left) { ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr); ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr); ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr); ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr); } PetscFunctionReturn(0); } #ifdef PETSC_HAVE_TXPETSCGPU #undef __FUNCT__ #define __FUNCT__ "MatMult_MPIAIJCUSPARSE" PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy) { // This multiplication sequence is different sequence // than the CPU version. In particular, the diagonal block // multiplication kernel is launched in one stream. Then, // in a separate stream, the data transfers from DeviceToHost // (with MPI messaging in between), then HostToDevice are // launched. Once the data transfer stream is synchronized, // to ensure messaging is complete, the MatMultAdd kernel // is launched in the original (MatMult) stream to protect // against race conditions. // // This sequence should only be called for GPU computation. Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; PetscErrorCode ierr; PetscInt nt; PetscFunctionBegin; ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr); ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr); PetscFunctionReturn(0); } #endif EXTERN_C_BEGIN PetscErrorCode MatCreate_MPIAIJ(Mat); EXTERN_C_END //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats); EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE" PetscErrorCode MatCreate_MPIAIJCUSPARSE(Mat B) { PetscErrorCode ierr; PetscFunctionBegin; ierr = MatCreate_MPIAIJ(B);CHKERRQ(ierr); ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE", MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr); B->ops->getvecs = MatGetVecs_MPIAIJCUSPARSE; //B->ops->setvaluesbatch = MatSetValuesBatch_MPIAIJCUSPARSE; #ifdef PETSC_HAVE_TXPETSCGPU B->ops->mult = MatMult_MPIAIJCUSPARSE; #endif ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCUSPARSE);CHKERRQ(ierr); PetscFunctionReturn(0); } EXTERN_C_END #undef __FUNCT__ #define __FUNCT__ "MatCreateAIJCUSPARSE" PetscErrorCode MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) { PetscErrorCode ierr; PetscMPIInt size; PetscFunctionBegin; ierr = MatCreate(comm,A);CHKERRQ(ierr); ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); if (size > 1) { ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); } else { ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); } PetscFunctionReturn(0); } /*MC MATAIJCUSPARSE - MATAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices. This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator, and MATMPIAIJCUSPARSE otherwise. As a result, for single process communicators, MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported for communicators controlling multiple processes. It is recommended that you call both of the above preallocation routines for simplicity. Options Database Keys: . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions() Level: beginner .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE M*/