140e23c03SJunchao Zhang #if !defined(__SFPACK_H) 240e23c03SJunchao Zhang #define __SFPACK_H 340e23c03SJunchao Zhang 4cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h> 5cd620004SJunchao Zhang 6cd620004SJunchao Zhang /* We separate SF communications for SFBasic and SFNeighbor in two parts: local (self,intra-rank) and remote (inter-rank) */ 7cd620004SJunchao Zhang typedef enum {PETSCSF_LOCAL=0, PETSCSF_REMOTE} PetscSFScope; 840e23c03SJunchao Zhang 9*fcc7397dSJunchao Zhang /* Optimizations in packing & unpacking for destination ranks. 1040e23c03SJunchao Zhang 11*fcc7397dSJunchao Zhang Suppose there are m indices stored in idx[], and two addresses u, p. We want to do packing: 12*fcc7397dSJunchao Zhang p[i] = u[idx[i]], for i in [0,m) 1340e23c03SJunchao Zhang 14*fcc7397dSJunchao Zhang Indices are associated with n ranks and each rank's indices are stored consecutively in idx[]. 15*fcc7397dSJunchao Zhang We go through indices for each rank and see if they are indices of a 3D submatrix of size [dx,dy,dz] in 16*fcc7397dSJunchao Zhang a parent matrix of size [X,Y,Z], with the submatrix's first index being <start>. 17cd620004SJunchao Zhang 18*fcc7397dSJunchao Zhang E.g., for indices 1,2,3, 6,7,8, 11,12,13, the submatrix size is [3,3,1] with start=1, and the parent matrix's size 19*fcc7397dSJunchao Zhang is [5,3,1]. For simplicity, if any destination rank does not have this pattern, we give up the optimization. 20*fcc7397dSJunchao Zhang 21*fcc7397dSJunchao Zhang Note before using this per-rank optimization, one should check leafcontig[], rootcontig[], which say 22*fcc7397dSJunchao Zhang indices in whole are contiguous, and therefore much more useful than this one when true. 2340e23c03SJunchao Zhang */ 2440e23c03SJunchao Zhang struct _n_PetscSFPackOpt { 25*fcc7397dSJunchao Zhang PetscInt *array; /* [7*n+2] Memory pool for other fields in this struct. Used to easily copy this struct to GPU */ 26b23bfdefSJunchao Zhang PetscInt n; /* Number of destination ranks */ 27*fcc7397dSJunchao Zhang PetscInt *offset; /* [n+1] Offsets of indices for each rank. offset[0]=0, offset[i+1]=offset[i]+dx[i]*dy[i]*dz[i] */ 28*fcc7397dSJunchao Zhang PetscInt *start; /* [n] First index */ 29*fcc7397dSJunchao Zhang PetscInt *dx,*dy,*dz; /* [n] Lengths of the submatrix in X, Y, Z dimension. */ 30*fcc7397dSJunchao Zhang PetscInt *X,*Y; /* [n] Lengths of the outer matrix in X, Y. We do not care Z. */ 3140e23c03SJunchao Zhang }; 3240e23c03SJunchao Zhang 33eb02082bSJunchao Zhang /* An abstract class that defines a communication link, which includes how to pack/unpack data and send/recv buffers 3440e23c03SJunchao Zhang */ 35*fcc7397dSJunchao Zhang struct _n_PetscSFLink { 36*fcc7397dSJunchao Zhang PetscErrorCode (*h_Pack) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,void*); 37*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndInsert) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 38*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndAdd) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 39*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndMin) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 40*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndMax) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 41*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndMinloc) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 42*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndMaxloc) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 43*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndMult) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 44*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndLAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 45*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndBAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 46*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndLOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 47*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndBOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 48*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndLXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 49*fcc7397dSJunchao Zhang PetscErrorCode (*h_UnpackAndBXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 50*fcc7397dSJunchao Zhang PetscErrorCode (*h_FetchAndAdd) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*, void*); 51*fcc7397dSJunchao Zhang 52*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndInsert)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 53*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndAdd) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 54*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndMin) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 55*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndMax) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 56*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndMinloc)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 57*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndMaxloc)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 58*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndMult) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 59*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndLAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 60*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndBAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 61*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndLOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 62*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndBOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 63*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndLXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 64*fcc7397dSJunchao Zhang PetscErrorCode (*h_ScatterAndBXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 65*fcc7397dSJunchao Zhang 66*fcc7397dSJunchao Zhang PetscErrorCode (*h_FetchAndAddLocal)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,void*); 67cd620004SJunchao Zhang 68cd620004SJunchao Zhang PetscBool deviceinited; /* Are device related fields initialized? */ 69eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 70eb02082bSJunchao Zhang /* These fields are lazily initialized in a sense that only when device pointers are passed to an SF, the SF 71eb02082bSJunchao Zhang will set them, otherwise it just leaves them alone even though PETSC_HAVE_CUDA. Packing routines using 72eb02082bSJunchao Zhang regular ops when there are no data race chances. 73eb02082bSJunchao Zhang */ 74*fcc7397dSJunchao Zhang PetscErrorCode (*d_Pack) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,void*); 75*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndInsert) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 76*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndAdd) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 77*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndMin) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 78*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndMax) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 79*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndMinloc) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 80*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndMaxloc) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 81*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndMult) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 82*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndLAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 83*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndBAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 84*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndLOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 85*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndBOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 86*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndLXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 87*fcc7397dSJunchao Zhang PetscErrorCode (*d_UnpackAndBXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 88*fcc7397dSJunchao Zhang PetscErrorCode (*d_FetchAndAdd) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*, void*); 89eb02082bSJunchao Zhang 90*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndInsert)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 91*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndAdd) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 92*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndMin) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 93*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndMax) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 94*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndMinloc)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 95*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndMaxloc)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 96*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndMult) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 97*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndLAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 98*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndBAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 99*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndLOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 100*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndBOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 101*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndLXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 102*fcc7397dSJunchao Zhang PetscErrorCode (*d_ScatterAndBXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 103*fcc7397dSJunchao Zhang PetscErrorCode (*d_FetchAndAddLocal)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,void*); 104eb02082bSJunchao Zhang 105eb02082bSJunchao Zhang /* Packing routines using atomics when there are data race chances */ 106*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndInsert)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 107*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndAdd) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 108*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndMin) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 109*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndMax) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 110*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndMinloc)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 111*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndMaxloc)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 112*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndMult) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 113*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndLAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 114*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndBAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 115*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndLOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 116*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndBOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 117*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndLXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 118*fcc7397dSJunchao Zhang PetscErrorCode (*da_UnpackAndBXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*); 119*fcc7397dSJunchao Zhang PetscErrorCode (*da_FetchAndAdd) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*, void*); 120cd620004SJunchao Zhang 121*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndInsert)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 122*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndAdd) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 123*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndMin) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 124*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndMax) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 125*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndMinloc)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 126*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndMaxloc)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 127*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndMult) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 128*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndLAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 129*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndBAND) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 130*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndLOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 131*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndBOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 132*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndLXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 133*fcc7397dSJunchao Zhang PetscErrorCode (*da_ScatterAndBXOR) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*); 134*fcc7397dSJunchao Zhang PetscErrorCode (*da_FetchAndAddLocal)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,void*); 135eb02082bSJunchao Zhang 136e315309dSJunchao Zhang PetscInt maxResidentThreadsPerGPU; /* It is a copy from SF for convenience */ 137eb02082bSJunchao Zhang cudaStream_t stream; /* Stream to launch pack/unapck kernels if not using the default stream */ 138eb02082bSJunchao Zhang #endif 139eb02082bSJunchao Zhang PetscMPIInt tag; /* Each link has a tag so we can perform multiple SF ops at the same time */ 140cd620004SJunchao Zhang MPI_Datatype unit; /* The MPI datatype this PetscSFLink is built for */ 141eb02082bSJunchao Zhang MPI_Datatype basicunit; /* unit is made of MPI builtin dataype basicunit */ 142e07844bfSJunchao Zhang PetscBool isbuiltin; /* Is unit an MPI/PETSc builtin datatype? If it is true, then bs=1 and basicunit is equivalent to unit */ 143eb02082bSJunchao Zhang size_t unitbytes; /* Number of bytes in a unit */ 144eb02082bSJunchao Zhang PetscInt bs; /* Number of basic units in a unit */ 145cd620004SJunchao Zhang const void *rootdata,*leafdata; /* rootdata and leafdata the link is working on. They are used as keys for pending links. */ 146cd620004SJunchao Zhang PetscMemType rootmtype,leafmtype; /* root/leafdata's memory type */ 147cd620004SJunchao Zhang 148cd620004SJunchao Zhang /* For local and remote communication */ 149cd620004SJunchao Zhang PetscMemType rootmtype_mpi,leafmtype_mpi; /* Mtypes of buffers passed to MPI. If use_gpu_aware_mpi, they are same as root/leafmtype. Otherwise they are PETSC_MEMTYPE_HOST */ 150cd620004SJunchao Zhang PetscBool rootdirect[2],leafdirect[2]; /* Can root/leafdata be directly passed to SF (i.e., without buffering). In layout of [PETSCSF_LOCAL/REMOTE]. See more in PetscSFLinkCreate() */ 151cd620004SJunchao Zhang PetscInt rootdirect_mpi,leafdirect_mpi;/* Can root/leafdata for remote be directly passed to MPI? 1: yes, 0: no. See more in PetscSFLinkCreate() */ 152cd620004SJunchao Zhang const void *rootdatadirect[2][2]; /* The root/leafdata used to init root/leaf requests, in layout of [PETSCSF_DIRECTION][PETSC_MEMTYPE]. */ 153cd620004SJunchao Zhang const void *leafdatadirect[2][2]; /* ... We need them to look up links when root/leafdirect_mpi are true */ 154cd620004SJunchao Zhang char *rootbuf[2][2]; /* Buffers for packed roots, in layout of [PETSCSF_LOCAL/REMOTE][PETSC_MEMTYPE] */ 155cd620004SJunchao Zhang char *rootbuf_alloc[2][2]; /* Log memory allocated by petsc. We need it since rootbuf[][] may point to rootdata given by user */ 156cd620004SJunchao Zhang char *leafbuf[2][2]; /* Buffers for packed leaves, in layout of [PETSCSF_LOCAL/REMOTE][PETSC_MEMTYPE] */ 157cd620004SJunchao Zhang char *leafbuf_alloc[2][2]; 158cd620004SJunchao Zhang MPI_Request *rootreqs[2][2][2]; /* Root requests in layout of [PETSCSF_DIRECTION][PETSC_MEMTYPE][rootdirect_mpi] */ 159cd620004SJunchao Zhang MPI_Request *leafreqs[2][2][2]; /* Leaf requests in layout of [PETSCSF_DIRECTION][PETSC_MEMTYPE][leafdirect_mpi] */ 160cd620004SJunchao Zhang PetscBool rootreqsinited[2][2][2]; /* Are root requests initialized? Also in layout of [PETSCSF_DIRECTION][PETSC_MEMTYPE][rootdirect_mpi]*/ 161cd620004SJunchao Zhang PetscBool leafreqsinited[2][2][2]; /* Are leaf requests initialized? Also in layout of [PETSCSF_DIRECTION][PETSC_MEMTYPE][leafdirect_mpi]*/ 162cd620004SJunchao Zhang MPI_Request *reqs; /* An array of length (nrootreqs+nleafreqs)*8. Pointers in rootreqs[][][] and leafreqs[][][] point here */ 163cd620004SJunchao Zhang PetscSFLink next; 16440e23c03SJunchao Zhang }; 16540e23c03SJunchao Zhang 166cd620004SJunchao Zhang #if defined(PETSC_USE_DEBUG) 167cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetErrorOnUnsupportedOverlap(PetscSF,MPI_Datatype,const void*,const void*); 168cd620004SJunchao Zhang #else 169cd620004SJunchao Zhang #define PetscSFSetErrorOnUnsupportedOverlap(a,b,c,d) 0 170cd620004SJunchao Zhang #endif 171b7c0d12aSJunchao Zhang 172cd620004SJunchao Zhang /* Create/setup/retrieve/destroy a link */ 173cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkCreate(PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,const void*,MPI_Op,PetscSFOperation,PetscSFLink*); 174cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkSetUp_Host(PetscSF,PetscSFLink,MPI_Datatype); 175cd620004SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 176cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkSetUp_Device(PetscSF,PetscSFLink,MPI_Datatype); 177cd620004SJunchao Zhang #else 178cd620004SJunchao Zhang #define PetscSFLinkSetUp_Device(a,b,c) 0 179cd620004SJunchao Zhang #endif 180cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkGetInUse(PetscSF,MPI_Datatype,const void*,const void*,PetscCopyMode,PetscSFLink*); 181cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkReclaim(PetscSF,PetscSFLink*); 182cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkDestroy(PetscSF,PetscSFLink*); 183cd620004SJunchao Zhang 184cd620004SJunchao Zhang /* Get pack/unpack function pointers from a link */ 185*fcc7397dSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFLinkGetPack(PetscSFLink link,PetscMemType mtype,PetscErrorCode (**Pack)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,void*)) 186eb02082bSJunchao Zhang { 187eb02082bSJunchao Zhang PetscFunctionBegin; 188eb02082bSJunchao Zhang if (mtype == PETSC_MEMTYPE_HOST) *Pack = link->h_Pack; 189eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 190cd620004SJunchao Zhang else *Pack = link->d_Pack; 191eb02082bSJunchao Zhang #endif 192eb02082bSJunchao Zhang PetscFunctionReturn(0); 193eb02082bSJunchao Zhang } 194*fcc7397dSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkGetUnpackAndOp(PetscSFLink,PetscMemType,MPI_Op,PetscBool,PetscErrorCode (**UnpackAndOp)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,const void*)); 195*fcc7397dSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkGetFetchAndOp (PetscSFLink,PetscMemType,MPI_Op,PetscBool,PetscErrorCode (**FetchAndOp) (PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,void*)); 196*fcc7397dSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkGetScatterAndOp(PetscSFLink,PetscMemType,MPI_Op,PetscBool,PetscErrorCode (**ScatterAndOp)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,PetscInt,PetscSFPackOpt,const PetscInt*,void*)); 197*fcc7397dSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkGetFetchAndOpLocal(PetscSFLink,PetscMemType,MPI_Op,PetscBool,PetscErrorCode (**FetchAndOpLocal)(PetscSFLink,PetscInt,PetscInt,PetscSFPackOpt,const PetscInt*,void*,PetscInt,PetscSFPackOpt,const PetscInt*,const void*,void*)); 198cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkGetMPIBuffersAndRequests(PetscSF,PetscSFLink,PetscSFDirection,void**,void**,MPI_Request**,MPI_Request**); 199b7c0d12aSJunchao Zhang 200cd620004SJunchao Zhang /* Do Pack/Unpack/Fetch/Scatter with the link */ 201cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkPackRootData (PetscSF,PetscSFLink,PetscSFScope,const void*); 202cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkPackLeafData (PetscSF,PetscSFLink,PetscSFScope,const void*); 203cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkUnpackRootData(PetscSF,PetscSFLink,PetscSFScope,void*,MPI_Op); 204cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkUnpackLeafData(PetscSF,PetscSFLink,PetscSFScope,void*,MPI_Op); 205cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkFetchRootData (PetscSF,PetscSFLink,PetscSFScope,void*,MPI_Op); 206cd620004SJunchao Zhang 207cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkBcastAndOpLocal(PetscSF,PetscSFLink,const void*,void*,MPI_Op); 208cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkReduceLocal(PetscSF,PetscSFLink,const void*,void*,MPI_Op); 209cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFLinkFetchAndOpLocal(PetscSF,PetscSFLink,void*,const void*,void*,MPI_Op); 210cd620004SJunchao Zhang 211cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUpPackFields(PetscSF sf); 212cd620004SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFResetPackFields(PetscSF sf); 213cd620004SJunchao Zhang 214cd620004SJunchao Zhang /* Get root indices used for pack/unpack 215cd620004SJunchao Zhang 216cd620004SJunchao Zhang Input arguments: 217cd620004SJunchao Zhang +sf - StarForest 218cd620004SJunchao Zhang .link - The link, which provides the stream for the async memcpy (In SF, we make all GPU operations asynchronous to avoid unexpected pipeline stalls) 219cd620004SJunchao Zhang .scope - Which part of the indices? (PETSCSF_LOCAL or PETSCSF_REMOTE) 220cd620004SJunchao Zhang .mtype - In what type of memory? (PETSC_MEMTYPE_DEVICE or PETSC_MEMTYPE_HOST) 221cd620004SJunchao Zhang 222cd620004SJunchao Zhang Output arguments: 223cd620004SJunchao Zhang +count - Count of indices 224cd620004SJunchao Zhang .start - The first index (only useful when indices is NULL) 225cd620004SJunchao Zhang -indices - indices of roots for pack/unpack. NULL means indices are contiguous 226cd620004SJunchao Zhang */ 227*fcc7397dSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFLinkGetRootPackOptAndIndices(PetscSF sf,PetscSFLink link,PetscMemType mtype,PetscSFScope scope,PetscInt *count,PetscInt *start,PetscSFPackOpt *opt,const PetscInt **indices) 228b7c0d12aSJunchao Zhang { 229cd620004SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 230cd620004SJunchao Zhang PetscInt offset; 231b7c0d12aSJunchao Zhang 232b7c0d12aSJunchao Zhang PetscFunctionBegin; 233*fcc7397dSJunchao Zhang *count = bas->rootbuflen[scope]; 234*fcc7397dSJunchao Zhang *start = bas->rootstart[scope]; 235*fcc7397dSJunchao Zhang *opt = NULL; 236*fcc7397dSJunchao Zhang *indices = NULL; 237*fcc7397dSJunchao Zhang 238*fcc7397dSJunchao Zhang /* We have these rules: 239*fcc7397dSJunchao Zhang 1) opt == NULL && indices == NULL ==> indices are contiguous. 240*fcc7397dSJunchao Zhang 2) opt != NULL ==> indices are in 3D but not contiguous. On host, indices != NULL since indices are already available and we do not 241*fcc7397dSJunchao Zhang want to enforce all operations to use opt; but on device, indices = NULL since we do not want to copy indices to device. 242*fcc7397dSJunchao Zhang */ 243*fcc7397dSJunchao Zhang if (!bas->rootcontig[scope]) { 244cd620004SJunchao Zhang offset = (scope == PETSCSF_LOCAL)? 0 : bas->ioffset[bas->ndiranks]; 245*fcc7397dSJunchao Zhang if (mtype == PETSC_MEMTYPE_HOST) {*opt = bas->rootpackopt[scope]; *indices = bas->irootloc + offset;} 246cd620004SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 247cd620004SJunchao Zhang else { 248*fcc7397dSJunchao Zhang PetscErrorCode ierr; 249cd620004SJunchao Zhang cudaError_t cerr; 250*fcc7397dSJunchao Zhang size_t size; 251*fcc7397dSJunchao Zhang if (bas->rootpackopt[scope]) { 252*fcc7397dSJunchao Zhang if (!bas->rootpackopt_d[scope]) { 253*fcc7397dSJunchao Zhang ierr = PetscMalloc1(1,&bas->rootpackopt_d[scope]);CHKERRQ(ierr); 254*fcc7397dSJunchao Zhang ierr = PetscArraycpy(bas->rootpackopt_d[scope],bas->rootpackopt[scope],1);CHKERRQ(ierr); /* Make pointers in bas->rootpackopt_d[] still work on host */ 255*fcc7397dSJunchao Zhang size = (bas->rootpackopt[scope]->n*7+2)*sizeof(PetscInt); /* See comments at struct _n_PetscSFPackOpt*/ 256*fcc7397dSJunchao Zhang cerr = cudaMalloc((void **)&bas->rootpackopt_d[scope]->array,size);CHKERRCUDA(cerr); 257*fcc7397dSJunchao Zhang cerr = cudaMemcpyAsync(bas->rootpackopt_d[scope]->array,bas->rootpackopt[scope]->array,size,cudaMemcpyHostToDevice,link->stream);CHKERRCUDA(cerr); 258*fcc7397dSJunchao Zhang } 259*fcc7397dSJunchao Zhang *opt = bas->rootpackopt_d[scope]; 260*fcc7397dSJunchao Zhang } else { /* On device, we only provide indices when there is no optimization. We're reluctant to copy indices to device. */ 261*fcc7397dSJunchao Zhang if (!bas->irootloc_d[scope]) { 262*fcc7397dSJunchao Zhang size = bas->rootbuflen[scope]*sizeof(PetscInt); 263cd620004SJunchao Zhang cerr = cudaMalloc((void **)&bas->irootloc_d[scope],size);CHKERRCUDA(cerr); 264cd620004SJunchao Zhang cerr = cudaMemcpyAsync(bas->irootloc_d[scope],bas->irootloc+offset,size,cudaMemcpyHostToDevice,link->stream);CHKERRCUDA(cerr); 265b7c0d12aSJunchao Zhang } 266cd620004SJunchao Zhang *indices = bas->irootloc_d[scope]; 267cd620004SJunchao Zhang } 268cd620004SJunchao Zhang } 269*fcc7397dSJunchao Zhang #endif 270cd620004SJunchao Zhang } 271b7c0d12aSJunchao Zhang PetscFunctionReturn(0); 272b7c0d12aSJunchao Zhang } 273b7c0d12aSJunchao Zhang 274cd620004SJunchao Zhang /* Get leaf indices used for pack/unpack 275cd620004SJunchao Zhang 276*fcc7397dSJunchao Zhang See also PetscSFLinkGetRootPackOptAndIndices() 277cd620004SJunchao Zhang */ 278*fcc7397dSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFLinkGetLeafPackOptAndIndices(PetscSF sf,PetscSFLink link,PetscMemType mtype,PetscSFScope scope,PetscInt *count,PetscInt *start,PetscSFPackOpt *opt,const PetscInt **indices) 279cd620004SJunchao Zhang { 280cd620004SJunchao Zhang PetscInt offset; 281cd620004SJunchao Zhang 282cd620004SJunchao Zhang PetscFunctionBegin; 283*fcc7397dSJunchao Zhang *count = sf->leafbuflen[scope]; 284*fcc7397dSJunchao Zhang *start = sf->leafstart[scope]; 285*fcc7397dSJunchao Zhang *opt = NULL; 286*fcc7397dSJunchao Zhang *indices = NULL; 287*fcc7397dSJunchao Zhang if (!sf->leafcontig[scope]) { 288cd620004SJunchao Zhang offset = (scope == PETSCSF_LOCAL)? 0 : sf->roffset[sf->ndranks]; 289*fcc7397dSJunchao Zhang if (mtype == PETSC_MEMTYPE_HOST) {*opt = sf->leafpackopt[scope]; *indices = sf->rmine + offset;} 290eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 291cd620004SJunchao Zhang else { 292*fcc7397dSJunchao Zhang PetscErrorCode ierr; 293cd620004SJunchao Zhang cudaError_t cerr; 294*fcc7397dSJunchao Zhang size_t size; 295*fcc7397dSJunchao Zhang if (sf->leafpackopt[scope]) { 296*fcc7397dSJunchao Zhang if (!sf->leafpackopt_d[scope]) { 297*fcc7397dSJunchao Zhang ierr = PetscMalloc1(1,&sf->leafpackopt_d[scope]);CHKERRQ(ierr); 298*fcc7397dSJunchao Zhang ierr = PetscArraycpy(sf->leafpackopt_d[scope],sf->leafpackopt[scope],1);CHKERRQ(ierr); 299*fcc7397dSJunchao Zhang size = (sf->leafpackopt[scope]->n*7+2)*sizeof(PetscInt); /* See comments at struct _n_PetscSFPackOpt*/ 300*fcc7397dSJunchao Zhang cerr = cudaMalloc((void **)&sf->leafpackopt_d[scope]->array,size);CHKERRCUDA(cerr); /* Change ->array to a device pointer */ 301*fcc7397dSJunchao Zhang cerr = cudaMemcpyAsync(sf->leafpackopt_d[scope]->array,sf->leafpackopt[scope]->array,size,cudaMemcpyHostToDevice,link->stream);CHKERRCUDA(cerr); 302*fcc7397dSJunchao Zhang } 303*fcc7397dSJunchao Zhang *opt = sf->leafpackopt_d[scope]; 304*fcc7397dSJunchao Zhang } else { 305*fcc7397dSJunchao Zhang if (!sf->rmine_d[scope]) { 306*fcc7397dSJunchao Zhang size = sf->leafbuflen[scope]*sizeof(PetscInt); 307cd620004SJunchao Zhang cerr = cudaMalloc((void **)&sf->rmine_d[scope],size);CHKERRCUDA(cerr); 308cd620004SJunchao Zhang cerr = cudaMemcpyAsync(sf->rmine_d[scope],sf->rmine+offset,size,cudaMemcpyHostToDevice,link->stream);CHKERRCUDA(cerr); 309cd620004SJunchao Zhang } 310cd620004SJunchao Zhang *indices = sf->rmine_d[scope]; 311cd620004SJunchao Zhang } 312cd620004SJunchao Zhang } 313*fcc7397dSJunchao Zhang #endif 314cd620004SJunchao Zhang } 315cd620004SJunchao Zhang PetscFunctionReturn(0); 316cd620004SJunchao Zhang } 317cd620004SJunchao Zhang 318cd620004SJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFLinkMPIWaitall(PetscSF sf,PetscSFLink link,PetscSFDirection direction) 319cd620004SJunchao Zhang { 320cd620004SJunchao Zhang PetscErrorCode ierr; 321cd620004SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 322cd620004SJunchao Zhang const PetscMemType rootmtype_mpi = link->rootmtype_mpi,leafmtype_mpi = link->leafmtype_mpi; 323cd620004SJunchao Zhang const PetscInt rootdirect_mpi = link->rootdirect_mpi,leafdirect_mpi = link->leafdirect_mpi; 324cd620004SJunchao Zhang 325cd620004SJunchao Zhang PetscFunctionBegin; 326cd620004SJunchao Zhang ierr = MPI_Waitall(bas->nrootreqs,link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi],MPI_STATUSES_IGNORE);CHKERRQ(ierr); 327cd620004SJunchao Zhang ierr = MPI_Waitall(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi],MPI_STATUSES_IGNORE);CHKERRQ(ierr); 328cd620004SJunchao Zhang PetscFunctionReturn(0); 329cd620004SJunchao Zhang } 330cd620004SJunchao Zhang 331f01131f0SJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFLinkMemcpy(PetscSF sf,PetscSFLink link,PetscMemType dstmtype,void* dst,PetscMemType srcmtype,const void*src,size_t n) 332f01131f0SJunchao Zhang { 333f01131f0SJunchao Zhang PetscFunctionBegin; 334f01131f0SJunchao Zhang if (n) { 335f01131f0SJunchao Zhang if (dstmtype == PETSC_MEMTYPE_HOST && srcmtype == PETSC_MEMTYPE_HOST) {PetscErrorCode ierr = PetscMemcpy(dst,src,n);CHKERRQ(ierr);} 336f01131f0SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 337f01131f0SJunchao Zhang else if (dstmtype == PETSC_MEMTYPE_DEVICE && srcmtype == PETSC_MEMTYPE_HOST) { 338f01131f0SJunchao Zhang cudaError_t err = cudaMemcpyAsync(dst,src,n,cudaMemcpyHostToDevice,link->stream);CHKERRCUDA(err); 339f01131f0SJunchao Zhang PetscErrorCode ierr = PetscLogCpuToGpu(n);CHKERRQ(ierr); 340f01131f0SJunchao Zhang } else if (dstmtype == PETSC_MEMTYPE_HOST && srcmtype == PETSC_MEMTYPE_DEVICE) { 341f01131f0SJunchao Zhang cudaError_t err = cudaMemcpyAsync(dst,src,n,cudaMemcpyDeviceToHost,link->stream);CHKERRCUDA(err); 342f01131f0SJunchao Zhang PetscErrorCode ierr = PetscLogGpuToCpu(n);CHKERRQ(ierr); 343f01131f0SJunchao Zhang } else if (dstmtype == PETSC_MEMTYPE_DEVICE && srcmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaMemcpyAsync(dst,src,n,cudaMemcpyDeviceToDevice,link->stream);CHKERRCUDA(err);} 344f01131f0SJunchao Zhang #endif 345f01131f0SJunchao Zhang else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType for dst %d and src %d",(int)dstmtype,(int)srcmtype); 346f01131f0SJunchao Zhang } 347f01131f0SJunchao Zhang PetscFunctionReturn(0); 348f01131f0SJunchao Zhang } 34940e23c03SJunchao Zhang #endif 350