1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 3 EXTERN_C_BEGIN 4 #undef __FUNCT__ 5 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE" 6 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 7 { 8 Mat_MPIAIJ *b; 9 PetscErrorCode ierr; 10 PetscInt i; 11 12 PetscFunctionBegin; 13 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 14 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 15 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 16 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 17 18 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 19 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 20 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 21 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 22 if (d_nnz) { 23 for (i=0; i<B->rmap->n; i++) { 24 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 25 } 26 } 27 if (o_nnz) { 28 for (i=0; i<B->rmap->n; i++) { 29 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 30 } 31 } 32 b = (Mat_MPIAIJ*)B->data; 33 34 if (!B->preallocated) { 35 /* Explicitly create 2 MATSEQAIJ matrices. */ 36 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 37 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 38 ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 39 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 40 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 41 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 42 ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 43 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 44 } 45 46 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 47 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 48 B->preallocated = PETSC_TRUE; 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 #undef __FUNCT__ 53 #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE" 54 PetscErrorCode MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left) 55 { 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 if (right) { 60 ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr); 61 ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 62 ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr); 63 ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr); 64 ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr); 65 } 66 if (left) { 67 ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr); 68 ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 69 ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr); 70 ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr); 71 ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr); 72 } 73 PetscFunctionReturn(0); 74 } 75 76 77 #ifdef PETSC_HAVE_TXPETSCGPU 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatMult_MPIAIJCUSPARSE" 80 PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy) 81 { 82 // This multiplication sequence is different sequence 83 // than the CPU version. In particular, the diagonal block 84 // multiplication kernel is launched in one stream. Then, 85 // in a separate stream, the data transfers from DeviceToHost 86 // (with MPI messaging in between), then HostToDevice are 87 // launched. Once the data transfer stream is synchronized, 88 // to ensure messaging is complete, the MatMultAdd kernel 89 // is launched in the original (MatMult) stream to protect 90 // against race conditions. 91 // 92 // This sequence should only be called for GPU computation. 93 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 94 PetscErrorCode ierr; 95 PetscInt nt; 96 97 PetscFunctionBegin; 98 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 99 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 100 ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr); 101 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 102 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 104 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 105 ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 #endif 109 110 EXTERN_C_BEGIN 111 PetscErrorCode MatCreate_MPIAIJ(Mat); 112 EXTERN_C_END 113 //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats); 114 115 EXTERN_C_BEGIN 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE" 118 PetscErrorCode MatCreate_MPIAIJCUSPARSE(Mat B) 119 { 120 PetscErrorCode ierr; 121 122 PetscFunctionBegin; 123 ierr = MatCreate_MPIAIJ(B);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 125 "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE", 126 MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr); 127 B->ops->getvecs = MatGetVecs_MPIAIJCUSPARSE; 128 //B->ops->setvaluesbatch = MatSetValuesBatch_MPIAIJCUSPARSE; 129 #ifdef PETSC_HAVE_TXPETSCGPU 130 B->ops->mult = MatMult_MPIAIJCUSPARSE; 131 #endif 132 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCUSPARSE);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 EXTERN_C_END 136 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatCreateAIJCUSPARSE" 140 PetscErrorCode MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 141 { 142 PetscErrorCode ierr; 143 PetscMPIInt size; 144 145 PetscFunctionBegin; 146 ierr = MatCreate(comm,A);CHKERRQ(ierr); 147 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 148 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 149 if (size > 1) { 150 ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr); 151 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 152 } else { 153 ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 154 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 /*MC 160 MATAIJCUSPARSE - MATAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices. 161 162 This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator, 163 and MATMPIAIJCUSPARSE otherwise. As a result, for single process communicators, 164 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 165 for communicators controlling multiple processes. It is recommended that you call both of 166 the above preallocation routines for simplicity. 167 168 Options Database Keys: 169 . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions() 170 171 Level: beginner 172 173 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE 174 M*/ 175 176