1 #include "petscconf.h" 2 PETSC_CUDA_EXTERN_C_BEGIN 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 PETSC_CUDA_EXTERN_C_END 5 #include "mpicusparsematimpl.h" 6 7 EXTERN_C_BEGIN 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE" 10 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 11 { 12 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 13 Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)b->spptr; 14 PetscErrorCode ierr; 15 PetscInt i; 16 17 PetscFunctionBegin; 18 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 19 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 20 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 21 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 22 23 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 24 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 25 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 26 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 27 if (d_nnz) { 28 for (i=0; i<B->rmap->n; i++) { 29 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 30 } 31 } 32 if (o_nnz) { 33 for (i=0; i<B->rmap->n; i++) { 34 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 35 } 36 } 37 if (!B->preallocated) { 38 /* Explicitly create 2 MATSEQAIJCUSPARSE matrices. */ 39 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 40 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 41 ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 42 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 43 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 44 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 45 ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 46 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 47 } 48 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 49 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 50 ierr=MatSetOption_SeqAIJCUSPARSE(b->A,cusparseStruct->diagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr); 51 ierr=MatSetOption_SeqAIJCUSPARSE(b->B,cusparseStruct->offdiagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr); 52 B->preallocated = PETSC_TRUE; 53 PetscFunctionReturn(0); 54 } 55 EXTERN_C_END 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE" 58 PetscErrorCode MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 if (right) { 64 ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr); 65 ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 66 ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr); 67 ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr); 68 ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr); 69 } 70 if (left) { 71 ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr); 72 ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 73 ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr); 74 ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr); 75 ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr); 76 } 77 PetscFunctionReturn(0); 78 } 79 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatMult_MPIAIJCUSPARSE" 83 PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy) 84 { 85 // This multiplication sequence is different sequence 86 // than the CPU version. In particular, the diagonal block 87 // multiplication kernel is launched in one stream. Then, 88 // in a separate stream, the data transfers from DeviceToHost 89 // (with MPI messaging in between), then HostToDevice are 90 // launched. Once the data transfer stream is synchronized, 91 // to ensure messaging is complete, the MatMultAdd kernel 92 // is launched in the original (MatMult) stream to protect 93 // against race conditions. 94 // 95 // This sequence should only be called for GPU computation. 96 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 97 PetscErrorCode ierr; 98 PetscInt nt; 99 100 PetscFunctionBegin; 101 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 102 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 103 ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr); 104 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 105 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 106 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 107 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 108 ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 112 #undef __FUNCT__ 113 #define __FUNCT__ "MatMult_MPIAIJCUSPARSE" 114 PetscErrorCode MatMultTranspose_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy) 115 { 116 // This multiplication sequence is different sequence 117 // than the CPU version. In particular, the diagonal block 118 // multiplication kernel is launched in one stream. Then, 119 // in a separate stream, the data transfers from DeviceToHost 120 // (with MPI messaging in between), then HostToDevice are 121 // launched. Once the data transfer stream is synchronized, 122 // to ensure messaging is complete, the MatMultAdd kernel 123 // is launched in the original (MatMult) stream to protect 124 // against race conditions. 125 // 126 // This sequence should only be called for GPU computation. 127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 128 PetscErrorCode ierr; 129 PetscInt nt; 130 131 PetscFunctionBegin; 132 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 133 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 134 ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr); 135 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 136 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 137 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 138 ierr = (*a->B->ops->multtransposeadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 139 ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats); 144 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatSetOption_MPIAIJCUSPARSE" 148 PetscErrorCode MatSetOption_MPIAIJCUSPARSE(Mat A,MatOption op,PetscBool flg) 149 { 150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 151 Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 152 PetscErrorCode ierr; 153 154 PetscFunctionBegin; 155 ierr = MatSetOption_MPIAIJ(A,op,flg);CHKERRQ(ierr); 156 switch (op) { 157 case MAT_DIAGBLOCK_CSR: 158 cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_CSR; 159 break; 160 case MAT_OFFDIAGBLOCK_CSR: 161 cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR; 162 break; 163 case MAT_DIAGBLOCK_DIA: 164 case MAT_OFFDIAGBLOCK_DIA: 165 case MAT_DIA: 166 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported GPU matrix storage format DIA for (MPI,SEQ)AIJCUSPARSE matrix type."); 167 case MAT_DIAGBLOCK_ELL: 168 cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_ELL; 169 break; 170 case MAT_OFFDIAGBLOCK_ELL: 171 cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_ELL; 172 break; 173 case MAT_DIAGBLOCK_HYB: 174 cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_HYB; 175 break; 176 case MAT_OFFDIAGBLOCK_HYB: 177 cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_HYB; 178 break; 179 default: 180 break; 181 } 182 PetscFunctionReturn(0); 183 } 184 185 186 EXTERN_C_BEGIN 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatSetFromOptions_MPIAIJCUSPARSE" 189 PetscErrorCode MatSetFromOptions_MPIAIJCUSPARSE(Mat A) 190 { 191 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 192 Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 193 PetscErrorCode ierr; 194 PetscInt idxDiag=0,idxOffDiag=0; 195 char * formats[]={CSR,ELL,HYB}; 196 MatOption diagFormat, offdiagFormat; 197 PetscBool flg; 198 PetscFunctionBegin; 199 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"When using TxPETSCGPU, MPIAIJCUSPARSE Options","Mat");CHKERRQ(ierr); 200 if (A->factortype==MAT_FACTOR_NONE) { 201 ierr = PetscOptionsEList("-mat_mult_cusparse_diag_storage_format", 202 "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV", 203 "None",formats,3,formats[0],&idxDiag,&flg);CHKERRQ(ierr); 204 ierr = PetscOptionsEList("-mat_mult_cusparse_offdiag_storage_format", 205 "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV", 206 "None",formats,3,formats[0],&idxOffDiag,&flg);CHKERRQ(ierr); 207 208 switch (idxDiag) 209 { 210 case 0: 211 diagFormat=MAT_CSR; 212 break; 213 case 2: 214 diagFormat=MAT_ELL; 215 break; 216 case 3: 217 diagFormat=MAT_HYB; 218 break; 219 } 220 221 switch (idxOffDiag) 222 { 223 case 0: 224 offdiagFormat=MAT_CSR; 225 break; 226 case 2: 227 offdiagFormat=MAT_ELL; 228 break; 229 case 3: 230 offdiagFormat=MAT_HYB; 231 break; 232 } 233 cusparseStruct->diagGPUMatFormat = diagFormat; 234 cusparseStruct->offdiagGPUMatFormat = offdiagFormat; 235 } 236 ierr = PetscOptionsEnd();CHKERRQ(ierr); 237 PetscFunctionReturn(0); 238 } 239 EXTERN_C_END 240 241 EXTERN_C_BEGIN 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatDestroy_MPIAIJCUSPARSE" 244 PetscErrorCode MatDestroy_MPIAIJCUSPARSE(Mat A) 245 { 246 PetscErrorCode ierr; 247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 248 Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 249 250 PetscFunctionBegin; 251 try { 252 delete cusparseStruct; 253 } catch(char* ex) { 254 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Mat_MPIAIJCUSPARSE error: %s", ex); 255 } 256 cusparseStruct = 0; 257 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 258 PetscFunctionReturn(0); 259 } 260 EXTERN_C_END 261 262 EXTERN_C_BEGIN 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE" 265 PetscErrorCode MatCreate_MPIAIJCUSPARSE(Mat A) 266 { 267 PetscErrorCode ierr; 268 Mat_MPIAIJ *a; 269 Mat_MPIAIJCUSPARSE * cusparseStruct; 270 271 PetscFunctionBegin; 272 ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr); 273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIAIJSetPreallocation_C", 274 "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE", 275 MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr); 276 a = (Mat_MPIAIJ*)A->data; 277 a->spptr = new Mat_MPIAIJCUSPARSE; 278 cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr; 279 cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_CSR; 280 cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR; 281 A->ops->getvecs = MatGetVecs_MPIAIJCUSPARSE; 282 A->ops->mult = MatMult_MPIAIJCUSPARSE; 283 A->ops->multtranspose = MatMultTranspose_MPIAIJCUSPARSE; 284 A->ops->setfromoptions = MatSetFromOptions_MPIAIJCUSPARSE; 285 A->ops->setoption = MatSetOption_MPIAIJCUSPARSE; 286 A->ops->destroy = MatDestroy_MPIAIJCUSPARSE; 287 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJCUSPARSE);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 EXTERN_C_END 291 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatCreateAIJCUSPARSE" 295 PetscErrorCode MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 296 { 297 PetscErrorCode ierr; 298 PetscMPIInt size; 299 300 PetscFunctionBegin; 301 ierr = MatCreate(comm,A);CHKERRQ(ierr); 302 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 303 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 304 if (size > 1) { 305 ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr); 306 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 307 } else { 308 ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 309 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 310 } 311 PetscFunctionReturn(0); 312 } 313 314 /*MC 315 MATMPIAIJCUSPARSE - MATMPIAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices. 316 317 This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator, 318 and MATMPIAIJCUSPARSE otherwise. As a result, for single process communicators, 319 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 320 for communicators controlling multiple processes. It is recommended that you call both of 321 the above preallocation routines for simplicity. 322 323 Options Database Keys: 324 . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions() 325 326 Level: beginner 327 328 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE 329 M*/ 330 331