1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include "mpicusparsematimpl.h" 3 4 EXTERN_C_BEGIN 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE" 7 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 8 { 9 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 10 Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)b->spptr; 11 PetscErrorCode ierr; 12 PetscInt i; 13 14 PetscFunctionBegin; 15 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 16 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 17 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 18 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 19 20 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 21 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 22 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 23 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 24 if (d_nnz) { 25 for (i=0; i<B->rmap->n; i++) { 26 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 27 } 28 } 29 if (o_nnz) { 30 for (i=0; i<B->rmap->n; i++) { 31 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 32 } 33 } 34 if (!B->preallocated) { 35 /* Explicitly create 2 MATSEQAIJCUSPARSE matrices. */ 36 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 37 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 38 ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 39 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 40 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 41 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 42 ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 43 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 44 } 45 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 46 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 47 ierr=MatSetOption_SeqAIJCUSPARSE(b->A,cusparseStruct->diagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr); 48 ierr=MatSetOption_SeqAIJCUSPARSE(b->B,cusparseStruct->offdiagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr); 49 B->preallocated = PETSC_TRUE; 50 PetscFunctionReturn(0); 51 } 52 EXTERN_C_END 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE" 55 PetscErrorCode MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left) 56 { 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 if (right) { 61 ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr); 62 ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 63 ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr); 64 ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr); 65 ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr); 66 } 67 if (left) { 68 ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr); 69 ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 70 ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr); 71 ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr); 72 ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr); 73 } 74 PetscFunctionReturn(0); 75 } 76 77 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatMult_MPIAIJCUSPARSE" 80 PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy) 81 { 82 // This multiplication sequence is different sequence 83 // than the CPU version. In particular, the diagonal block 84 // multiplication kernel is launched in one stream. Then, 85 // in a separate stream, the data transfers from DeviceToHost 86 // (with MPI messaging in between), then HostToDevice are 87 // launched. Once the data transfer stream is synchronized, 88 // to ensure messaging is complete, the MatMultAdd kernel 89 // is launched in the original (MatMult) stream to protect 90 // against race conditions. 91 // 92 // This sequence should only be called for GPU computation. 93 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 94 PetscErrorCode ierr; 95 PetscInt nt; 96 97 PetscFunctionBegin; 98 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 99 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 100 ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr); 101 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 102 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 104 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 105 ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatMult_MPIAIJCUSPARSE" 111 PetscErrorCode MatMultTranspose_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy) 112 { 113 // This multiplication sequence is different sequence 114 // than the CPU version. In particular, the diagonal block 115 // multiplication kernel is launched in one stream. Then, 116 // in a separate stream, the data transfers from DeviceToHost 117 // (with MPI messaging in between), then HostToDevice are 118 // launched. Once the data transfer stream is synchronized, 119 // to ensure messaging is complete, the MatMultAdd kernel 120 // is launched in the original (MatMult) stream to protect 121 // against race conditions. 122 // 123 // This sequence should only be called for GPU computation. 124 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 125 PetscErrorCode ierr; 126 PetscInt nt; 127 128 PetscFunctionBegin; 129 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 130 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 131 ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr); 132 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 133 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 134 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 135 ierr = (*a->B->ops->multtransposeadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 136 ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 EXTERN_C_BEGIN 141 PetscErrorCode MatCreate_MPIAIJ(Mat); 142 EXTERN_C_END 143 //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats); 144 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatSetOption_MPIAIJCUSPARSE" 148 PetscErrorCode MatSetOption_MPIAIJCUSPARSE(Mat A,MatOption op,PetscBool flg) 149 { 150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 151 Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 152 PetscErrorCode ierr; 153 154 PetscFunctionBegin; 155 ierr = MatSetOption_MPIAIJ(A,op,flg);CHKERRQ(ierr); 156 switch (op) { 157 case MAT_DIAGBLOCK_CSR: 158 cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_CSR; 159 break; 160 case MAT_OFFDIAGBLOCK_CSR: 161 cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR; 162 break; 163 case MAT_DIAGBLOCK_DIA: 164 case MAT_OFFDIAGBLOCK_DIA: 165 case MAT_DIA: 166 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported GPU matrix storage format DIA for (MPI,SEQ)AIJCUSPARSE matrix type."); 167 case MAT_DIAGBLOCK_ELL: 168 cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_ELL; 169 break; 170 case MAT_OFFDIAGBLOCK_ELL: 171 cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_ELL; 172 break; 173 case MAT_DIAGBLOCK_HYB: 174 cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_HYB; 175 break; 176 case MAT_OFFDIAGBLOCK_HYB: 177 cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_HYB; 178 break; 179 default: 180 break; 181 } 182 PetscFunctionReturn(0); 183 } 184 185 186 EXTERN_C_BEGIN 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatSetFromOptions_MPIAIJCUSPARSE" 189 PetscErrorCode MatSetFromOptions_MPIAIJCUSPARSE(Mat A) 190 { 191 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 192 Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 193 PetscErrorCode ierr; 194 PetscInt idxDiag=0,idxOffDiag=0; 195 char * formats[]={CSR,ELL,HYB}; 196 MatOption diagFormat, offdiagFormat; 197 PetscBool flg; 198 PetscFunctionBegin; 199 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"When using TxPETSCGPU, MPIAIJCUSPARSE Options","Mat");CHKERRQ(ierr); 200 if (A->factortype==MAT_FACTOR_NONE) { 201 ierr = PetscOptionsEList("-mat_mult_cusparse_diag_storage_format", 202 "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV", 203 "None",formats,3,formats[0],&idxDiag,&flg);CHKERRQ(ierr); 204 ierr = PetscOptionsEList("-mat_mult_cusparse_offdiag_storage_format", 205 "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV", 206 "None",formats,3,formats[0],&idxOffDiag,&flg);CHKERRQ(ierr); 207 208 if (formats[idxDiag] == CSR) 209 diagFormat=MAT_CSR; 210 else if (formats[idxDiag] == ELL) 211 diagFormat=MAT_ELL; 212 else if (formats[idxDiag] == HYB) 213 diagFormat=MAT_HYB; 214 215 if (formats[idxOffDiag] == CSR) 216 offdiagFormat=MAT_CSR; 217 else if (formats[idxOffDiag] == ELL) 218 offdiagFormat=MAT_ELL; 219 else if (formats[idxOffDiag] == HYB) 220 offdiagFormat=MAT_HYB; 221 222 cusparseStruct->diagGPUMatFormat = diagFormat; 223 cusparseStruct->offdiagGPUMatFormat = offdiagFormat; 224 } 225 ierr = PetscOptionsEnd();CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 EXTERN_C_END 229 230 EXTERN_C_BEGIN 231 #undef __FUNCT__ 232 #define __FUNCT__ "MatDestroy_MPIAIJCUSPARSE" 233 PetscErrorCode MatDestroy_MPIAIJCUSPARSE(Mat A) 234 { 235 PetscErrorCode ierr; 236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 237 Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 238 239 PetscFunctionBegin; 240 try { 241 delete cusparseStruct; 242 } catch(char* ex) { 243 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Mat_MPIAIJCUSPARSE error: %s", ex); 244 } 245 cusparseStruct = 0; 246 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 EXTERN_C_END 250 251 EXTERN_C_BEGIN 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE" 254 PetscErrorCode MatCreate_MPIAIJCUSPARSE(Mat A) 255 { 256 PetscErrorCode ierr; 257 Mat_MPIAIJ *a; 258 Mat_MPIAIJCUSPARSE * cusparseStruct; 259 260 PetscFunctionBegin; 261 ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr); 262 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIAIJSetPreallocation_C", 263 "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE", 264 MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr); 265 a = (Mat_MPIAIJ*)A->data; 266 a->spptr = new Mat_MPIAIJCUSPARSE; 267 cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 268 cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_CSR; 269 cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR; 270 A->ops->getvecs = MatGetVecs_MPIAIJCUSPARSE; 271 A->ops->mult = MatMult_MPIAIJCUSPARSE; 272 A->ops->multtranspose = MatMultTranspose_MPIAIJCUSPARSE; 273 A->ops->setfromoptions = MatSetFromOptions_MPIAIJCUSPARSE; 274 A->ops->setoption = MatSetOption_MPIAIJCUSPARSE; 275 A->ops->destroy = MatDestroy_MPIAIJCUSPARSE; 276 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJCUSPARSE);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 EXTERN_C_END 280 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatCreateAIJCUSPARSE" 284 PetscErrorCode MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 285 { 286 PetscErrorCode ierr; 287 PetscMPIInt size; 288 289 PetscFunctionBegin; 290 ierr = MatCreate(comm,A);CHKERRQ(ierr); 291 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 292 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 293 if (size > 1) { 294 ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr); 295 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 296 } else { 297 ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 298 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 299 } 300 PetscFunctionReturn(0); 301 } 302 303 /*MC 304 MATMPIAIJCUSPARSE - MATMPIAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices. 305 306 This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator, 307 and MATMPIAIJCUSPARSE otherwise. As a result, for single process communicators, 308 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 309 for communicators controlling multiple processes. It is recommended that you call both of 310 the above preallocation routines for simplicity. 311 312 Options Database Keys: 313 . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions() 314 315 Level: beginner 316 317 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE 318 M*/ 319 320