1 static const char help[] = "Test star forest communication (PetscSF)\n\n"; 2 3 /*T 4 Description: A star is a simple tree with one root and zero or more leaves. 5 A star forest is a union of disjoint stars. 6 Many common communication patterns can be expressed as updates of rootdata using leafdata and vice-versa. 7 This example creates a star forest, communicates values using the graph (see options for types of communication), views the graph, then destroys it. 8 T*/ 9 10 /* 11 Include petscsf.h so we can use PetscSF objects. Note that this automatically 12 includes petscsys.h. 13 */ 14 #include <petscsf.h> 15 #include <petscviewer.h> 16 17 /* like PetscSFView() but with alternative array of local indices */ 18 static PetscErrorCode PetscSFViewCustomLocals_Private(PetscSF sf,const PetscInt locals[],PetscViewer viewer) 19 { 20 const PetscSFNode *iremote; 21 PetscInt i,nroots,nleaves,nranks; 22 PetscMPIInt rank; 23 PetscErrorCode ierr; 24 25 PetscFunctionBeginUser; 26 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 27 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 28 ierr = PetscSFGetRootRanks(sf,&nranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 29 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 30 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 31 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,nroots,nleaves,nranks);CHKERRQ(ierr); 32 for (i=0; i<nleaves; i++) { 33 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,locals[i],iremote[i].rank,iremote[i].index);CHKERRQ(ierr); 34 } 35 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 36 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 37 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 38 PetscFunctionReturn(0); 39 } 40 41 int main(int argc,char **argv) 42 { 43 PetscErrorCode ierr; 44 PetscInt i,nroots,nrootsalloc,nleaves,nleavesalloc,*mine,stride; 45 PetscSFNode *remote; 46 PetscMPIInt rank,size; 47 PetscSF sf; 48 PetscBool test_all,test_bcast,test_bcastop,test_reduce,test_degree,test_fetchandop,test_gather,test_scatter,test_embed,test_invert,test_sf_distribute,test_char; 49 MPI_Op mop=MPI_OP_NULL; /* initialize to prevent compiler warnings with cxx_quad build */ 50 char opstring[256]; 51 PetscBool strflg; 52 53 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 54 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 55 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 56 57 ierr = PetscOptionsBegin(PETSC_COMM_WORLD,"","PetscSF Test Options","none");CHKERRQ(ierr); 58 test_all = PETSC_FALSE; 59 ierr = PetscOptionsBool("-test_all","Test all SF communications","",test_all,&test_all,NULL);CHKERRQ(ierr); 60 test_bcast = test_all; 61 ierr = PetscOptionsBool("-test_bcast","Test broadcast","",test_bcast,&test_bcast,NULL);CHKERRQ(ierr); 62 test_bcastop = test_all; 63 ierr = PetscOptionsBool("-test_bcastop","Test broadcast and reduce","",test_bcastop,&test_bcastop,NULL);CHKERRQ(ierr); 64 test_reduce = test_all; 65 ierr = PetscOptionsBool("-test_reduce","Test reduction","",test_reduce,&test_reduce,NULL);CHKERRQ(ierr); 66 test_char = test_all; 67 ierr = PetscOptionsBool("-test_char","Test signed char, unsigned char, and char","",test_char,&test_char,NULL);CHKERRQ(ierr); 68 mop = MPI_SUM; 69 ierr = PetscStrcpy(opstring,"sum");CHKERRQ(ierr); 70 ierr = PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,256,NULL);CHKERRQ(ierr); 71 ierr = PetscStrcmp("sum",opstring,&strflg);CHKERRQ(ierr); 72 if (strflg) { 73 mop = MPIU_SUM; 74 } 75 ierr = PetscStrcmp("prod",opstring,&strflg);CHKERRQ(ierr); 76 if (strflg) { 77 mop = MPI_PROD; 78 } 79 ierr = PetscStrcmp("max",opstring,&strflg);CHKERRQ(ierr); 80 if (strflg) { 81 mop = MPI_MAX; 82 } 83 ierr = PetscStrcmp("min",opstring,&strflg);CHKERRQ(ierr); 84 if (strflg) { 85 mop = MPI_MIN; 86 } 87 ierr = PetscStrcmp("land",opstring,&strflg);CHKERRQ(ierr); 88 if (strflg) { 89 mop = MPI_LAND; 90 } 91 ierr = PetscStrcmp("band",opstring,&strflg);CHKERRQ(ierr); 92 if (strflg) { 93 mop = MPI_BAND; 94 } 95 ierr = PetscStrcmp("lor",opstring,&strflg);CHKERRQ(ierr); 96 if (strflg) { 97 mop = MPI_LOR; 98 } 99 ierr = PetscStrcmp("bor",opstring,&strflg);CHKERRQ(ierr); 100 if (strflg) { 101 mop = MPI_BOR; 102 } 103 ierr = PetscStrcmp("lxor",opstring,&strflg);CHKERRQ(ierr); 104 if (strflg) { 105 mop = MPI_LXOR; 106 } 107 ierr = PetscStrcmp("bxor",opstring,&strflg);CHKERRQ(ierr); 108 if (strflg) { 109 mop = MPI_BXOR; 110 } 111 test_degree = test_all; 112 ierr = PetscOptionsBool("-test_degree","Test computation of vertex degree","",test_degree,&test_degree,NULL);CHKERRQ(ierr); 113 test_fetchandop = test_all; 114 ierr = PetscOptionsBool("-test_fetchandop","Test atomic Fetch-And-Op","",test_fetchandop,&test_fetchandop,NULL);CHKERRQ(ierr); 115 test_gather = test_all; 116 ierr = PetscOptionsBool("-test_gather","Test point gather","",test_gather,&test_gather,NULL);CHKERRQ(ierr); 117 test_scatter = test_all; 118 ierr = PetscOptionsBool("-test_scatter","Test point scatter","",test_scatter,&test_scatter,NULL);CHKERRQ(ierr); 119 test_embed = test_all; 120 ierr = PetscOptionsBool("-test_embed","Test point embed","",test_embed,&test_embed,NULL);CHKERRQ(ierr); 121 test_invert = test_all; 122 ierr = PetscOptionsBool("-test_invert","Test point invert","",test_invert,&test_invert,NULL);CHKERRQ(ierr); 123 stride = 1; 124 ierr = PetscOptionsInt("-stride","Stride for leaf and root data","",stride,&stride,NULL);CHKERRQ(ierr); 125 test_sf_distribute = PETSC_FALSE; 126 ierr = PetscOptionsBool("-test_sf_distribute","Create an SF that 'distributes' to each process, like an alltoall","",test_sf_distribute,&test_sf_distribute,NULL);CHKERRQ(ierr); 127 ierr = PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,256,NULL);CHKERRQ(ierr); 128 ierr = PetscOptionsEnd();CHKERRQ(ierr); 129 130 if (test_sf_distribute) { 131 nroots = size; 132 nrootsalloc = size; 133 nleaves = size; 134 nleavesalloc = size; 135 mine = NULL; 136 ierr = PetscMalloc1(nleaves,&remote);CHKERRQ(ierr); 137 for (i=0; i<size; i++) { 138 remote[i].rank = i; 139 remote[i].index = rank; 140 } 141 } else { 142 nroots = 2 + (PetscInt)(rank == 0); 143 nrootsalloc = nroots * stride; 144 nleaves = 2 + (PetscInt)(rank > 0); 145 nleavesalloc = nleaves * stride; 146 mine = NULL; 147 if (stride > 1) { 148 PetscInt i; 149 150 ierr = PetscMalloc1(nleaves,&mine);CHKERRQ(ierr); 151 for (i = 0; i < nleaves; i++) { 152 mine[i] = stride * i; 153 } 154 } 155 ierr = PetscMalloc1(nleaves,&remote);CHKERRQ(ierr); 156 /* Left periodic neighbor */ 157 remote[0].rank = (rank+size-1)%size; 158 remote[0].index = 1 * stride; 159 /* Right periodic neighbor */ 160 remote[1].rank = (rank+1)%size; 161 remote[1].index = 0 * stride; 162 if (rank > 0) { /* All processes reference rank 0, index 1 */ 163 remote[2].rank = 0; 164 remote[2].index = 2 * stride; 165 } 166 } 167 168 /* Create a star forest for communication. In this example, the leaf space is dense, so we pass NULL. */ 169 ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); 170 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 171 ierr = PetscSFSetGraph(sf,nrootsalloc,nleaves,mine,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 172 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 173 174 /* View graph, mostly useful for debugging purposes. */ 175 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);CHKERRQ(ierr); 176 ierr = PetscSFView(sf,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 177 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 178 179 if (test_bcast) { /* broadcast rootdata into leafdata */ 180 PetscInt *rootdata,*leafdata; 181 /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including 182 * user-defined structures, could also be used. */ 183 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 184 /* Set rootdata buffer to be broadcast */ 185 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 186 for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i; 187 /* Initialize local buffer, these values are never used. */ 188 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 189 /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */ 190 ierr = PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 191 ierr = PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 192 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata\n");CHKERRQ(ierr); 193 ierr = PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 194 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata\n");CHKERRQ(ierr); 195 ierr = PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 196 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 197 } 198 199 if (test_bcast && test_char) { /* Bcast with char */ 200 PetscInt len; 201 char buf[256]; 202 char *rootdata,*leafdata; 203 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 204 /* Set rootdata buffer to be broadcast */ 205 for (i=0; i<nrootsalloc; i++) rootdata[i] = '*'; 206 for (i=0; i<nroots; i++) rootdata[i*stride] = 'A' + rank*3 + i; /* rank is very small, so it is fine to compute a char */ 207 /* Initialize local buffer, these values are never used. */ 208 for (i=0; i<nleavesalloc; i++) leafdata[i] = '?'; 209 210 ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata);CHKERRQ(ierr); 211 ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata);CHKERRQ(ierr); 212 213 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata in type of char\n");CHKERRQ(ierr); 214 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 215 for (i=0; i<nrootsalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5c",rootdata[i]);CHKERRQ(ierr); len += 5;} 216 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 217 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 218 219 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata in type of char\n");CHKERRQ(ierr); 220 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 221 for (i=0; i<nleavesalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5c",leafdata[i]);CHKERRQ(ierr); len += 5;} 222 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 223 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 224 225 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 226 } 227 228 if (test_bcastop) { /* Reduce rootdata into leafdata */ 229 PetscInt *rootdata,*leafdata; 230 /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including 231 * user-defined structures, could also be used. */ 232 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 233 /* Set rootdata buffer to be broadcast */ 234 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 235 for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i; 236 /* Set leaf values to reduce with */ 237 for (i=0; i<nleavesalloc; i++) leafdata[i] = -10*(rank+1) - i; 238 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-BcastAndOp Leafdata\n");CHKERRQ(ierr); 239 ierr = PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 240 /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */ 241 ierr = PetscSFBcastAndOpBegin(sf,MPIU_INT,rootdata,leafdata,mop);CHKERRQ(ierr); 242 ierr = PetscSFBcastAndOpEnd(sf,MPIU_INT,rootdata,leafdata,mop);CHKERRQ(ierr); 243 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Rootdata\n");CHKERRQ(ierr); 244 ierr = PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 245 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Leafdata\n");CHKERRQ(ierr); 246 ierr = PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 247 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 248 } 249 250 if (test_reduce) { /* Reduce leafdata into rootdata */ 251 PetscInt *rootdata,*leafdata; 252 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 253 /* Initialize rootdata buffer in which the result of the reduction will appear. */ 254 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 255 for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i; 256 /* Set leaf values to reduce. */ 257 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 258 for (i=0; i<nleaves; i++) leafdata[i*stride] = 1000*(rank+1) + 10*i; 259 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata\n");CHKERRQ(ierr); 260 ierr = PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 261 /* Perform reduction. Computation or other communication can be performed between the begin and end calls. 262 * This example sums the values, but other MPI_Ops can be used (e.g MPI_MAX, MPI_PROD). */ 263 ierr = PetscSFReduceBegin(sf,MPIU_INT,leafdata,rootdata,mop);CHKERRQ(ierr); 264 ierr = PetscSFReduceEnd(sf,MPIU_INT,leafdata,rootdata,mop);CHKERRQ(ierr); 265 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata\n");CHKERRQ(ierr); 266 ierr = PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 267 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata\n");CHKERRQ(ierr); 268 ierr = PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 269 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 270 } 271 272 if (test_reduce && test_char) { /* Reduce with signed char */ 273 PetscInt len; 274 char buf[256]; 275 signed char *rootdata,*leafdata; 276 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 277 /* Initialize rootdata buffer in which the result of the reduction will appear. */ 278 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 279 for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i; 280 /* Set leaf values to reduce. */ 281 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 282 for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i; 283 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of signed char\n");CHKERRQ(ierr); 284 285 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 286 for (i=0; i<nrootsalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i]);CHKERRQ(ierr); len += 5;} 287 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 288 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 289 290 /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR. 291 Testing with -test_op max, one can see the sign does take effect in MPI_MAX. 292 */ 293 ierr = PetscSFReduceBegin(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop);CHKERRQ(ierr); 294 ierr = PetscSFReduceEnd(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop);CHKERRQ(ierr); 295 296 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of signed char\n");CHKERRQ(ierr); 297 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 298 for (i=0; i<nleavesalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5d",leafdata[i]);CHKERRQ(ierr); len += 5;} 299 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 300 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 301 302 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of signed char\n");CHKERRQ(ierr); 303 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 304 for (i=0; i<nrootsalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i]);CHKERRQ(ierr); len += 5;} 305 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 306 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 307 308 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 309 } 310 311 if (test_reduce && test_char) { /* Reduce with unsigned char */ 312 PetscInt len; 313 char buf[256]; 314 unsigned char *rootdata,*leafdata; 315 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 316 /* Initialize rootdata buffer in which the result of the reduction will appear. */ 317 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 318 for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i; 319 /* Set leaf values to reduce. */ 320 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 321 for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i; 322 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of unsigned char\n");CHKERRQ(ierr); 323 324 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 325 for (i=0; i<nrootsalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i]);CHKERRQ(ierr); len += 5;} 326 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 327 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 328 329 /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR. 330 Testing with -test_op max, one can see the sign does take effect in MPI_MAX. 331 */ 332 ierr = PetscSFReduceBegin(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop);CHKERRQ(ierr); 334 335 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of unsigned char\n");CHKERRQ(ierr); 336 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 337 for (i=0; i<nleavesalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5u",leafdata[i]);CHKERRQ(ierr); len += 5;} 338 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 339 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 340 341 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of unsigned char\n");CHKERRQ(ierr); 342 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 343 for (i=0; i<nrootsalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i]);CHKERRQ(ierr); len += 5;} 344 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 345 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 346 347 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 348 } 349 350 if (test_degree) { 351 const PetscInt *degree; 352 ierr = PetscSFComputeDegreeBegin(sf,°ree);CHKERRQ(ierr); 353 ierr = PetscSFComputeDegreeEnd(sf,°ree);CHKERRQ(ierr); 354 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Root degrees\n");CHKERRQ(ierr); 355 ierr = PetscIntView(nrootsalloc,degree,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 356 } 357 358 if (test_fetchandop) { 359 /* Cannot use text compare here because token ordering is not deterministic */ 360 PetscInt *leafdata,*leafupdate,*rootdata; 361 ierr = PetscMalloc3(nleavesalloc,&leafdata,nleavesalloc,&leafupdate,nrootsalloc,&rootdata);CHKERRQ(ierr); 362 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 363 for (i=0; i<nleaves; i++) leafdata[i*stride] = 1; 364 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 365 for (i=0; i<nroots; i++) rootdata[i*stride] = 0; 366 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);CHKERRQ(ierr); 367 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);CHKERRQ(ierr); 368 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Rootdata (sum of 1 from each leaf)\n");CHKERRQ(ierr); 369 ierr = PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 370 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Leafupdate (value at roots prior to my atomic update)\n");CHKERRQ(ierr); 371 ierr = PetscIntView(nleavesalloc,leafupdate,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 372 ierr = PetscFree3(leafdata,leafupdate,rootdata);CHKERRQ(ierr); 373 } 374 375 if (test_gather) { 376 const PetscInt *degree; 377 PetscInt inedges,*indata,*outdata; 378 ierr = PetscSFComputeDegreeBegin(sf,°ree);CHKERRQ(ierr); 379 ierr = PetscSFComputeDegreeEnd(sf,°ree);CHKERRQ(ierr); 380 for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i]; 381 ierr = PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);CHKERRQ(ierr); 382 for (i=0; i<nleavesalloc; i++) outdata[i] = -1; 383 for (i=0; i<nleaves; i++) outdata[i*stride] = 1000*(rank+1) + i; 384 ierr = PetscSFGatherBegin(sf,MPIU_INT,outdata,indata);CHKERRQ(ierr); 385 ierr = PetscSFGatherEnd(sf,MPIU_INT,outdata,indata);CHKERRQ(ierr); 386 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Gathered data at multi-roots from leaves\n");CHKERRQ(ierr); 387 ierr = PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 388 ierr = PetscFree2(indata,outdata);CHKERRQ(ierr); 389 } 390 391 if (test_scatter) { 392 const PetscInt *degree; 393 PetscInt j,count,inedges,*indata,*outdata; 394 ierr = PetscSFComputeDegreeBegin(sf,°ree);CHKERRQ(ierr); 395 ierr = PetscSFComputeDegreeEnd(sf,°ree);CHKERRQ(ierr); 396 for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i]; 397 ierr = PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);CHKERRQ(ierr); 398 for (i=0; i<nleavesalloc; i++) outdata[i] = -1; 399 for (i=0,count=0; i<nrootsalloc; i++) { 400 for (j=0; j<degree[i]; j++) indata[count++] = 1000*(rank+1) + 100*i + j; 401 } 402 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Data at multi-roots, to scatter to leaves\n");CHKERRQ(ierr); 403 ierr = PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 404 405 ierr = PetscSFScatterBegin(sf,MPIU_INT,indata,outdata);CHKERRQ(ierr); 406 ierr = PetscSFScatterEnd(sf,MPIU_INT,indata,outdata);CHKERRQ(ierr); 407 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Scattered data at leaves\n");CHKERRQ(ierr); 408 ierr = PetscIntView(nleavesalloc,outdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 409 ierr = PetscFree2(indata,outdata);CHKERRQ(ierr); 410 } 411 412 if (test_embed) { 413 const PetscInt nroots = 1 + (PetscInt) !rank; 414 PetscInt selected[2]; 415 PetscSF esf; 416 417 selected[0] = stride; 418 selected[1] = 2*stride; 419 ierr = PetscSFCreateEmbeddedSF(sf,nroots,selected,&esf);CHKERRQ(ierr); 420 ierr = PetscSFSetUp(esf);CHKERRQ(ierr); 421 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Embedded PetscSF\n");CHKERRQ(ierr); 422 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);CHKERRQ(ierr); 423 ierr = PetscSFView(esf,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 424 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 425 ierr = PetscSFDestroy(&esf);CHKERRQ(ierr); 426 } 427 428 if (test_invert) { 429 const PetscInt *degree; 430 PetscInt *mRootsOrigNumbering; 431 PetscInt inedges; 432 PetscSF msf,imsf; 433 434 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 435 ierr = PetscSFCreateInverseSF(msf,&imsf);CHKERRQ(ierr); 436 ierr = PetscSFSetUp(msf);CHKERRQ(ierr); 437 ierr = PetscSFSetUp(imsf);CHKERRQ(ierr); 438 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF\n");CHKERRQ(ierr); 439 ierr = PetscSFView(msf,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 440 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF roots indices in original SF roots numbering\n");CHKERRQ(ierr); 441 ierr = PetscSFComputeDegreeBegin(sf,°ree);CHKERRQ(ierr); 442 ierr = PetscSFComputeDegreeEnd(sf,°ree);CHKERRQ(ierr); 443 ierr = PetscSFComputeMultiRootOriginalNumbering(sf,degree,&inedges,&mRootsOrigNumbering);CHKERRQ(ierr); 444 ierr = PetscIntView(inedges,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 445 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF\n");CHKERRQ(ierr); 446 ierr = PetscSFView(imsf,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 447 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF, original numbering\n");CHKERRQ(ierr); 448 ierr = PetscSFViewCustomLocals_Private(imsf,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 449 ierr = PetscSFDestroy(&imsf);CHKERRQ(ierr); 450 ierr = PetscFree(mRootsOrigNumbering);CHKERRQ(ierr); 451 } 452 453 /* Clean storage for star forest. */ 454 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 455 ierr = PetscFinalize(); 456 return ierr; 457 } 458 459 /*TEST 460 461 test: 462 nsize: 4 463 filter: grep -v "type" | grep -v "sort" 464 args: -test_bcast -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 465 requires: define(PETSC_HAVE_MPI_ONE_SIDED) 466 467 468 test: 469 suffix: 2 470 nsize: 4 471 filter: grep -v "type" | grep -v "sort" 472 args: -test_reduce -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 473 requires: define(PETSC_HAVE_MPI_ONE_SIDED) 474 475 test: 476 suffix: 2_basic 477 nsize: 4 478 args: -test_reduce -sf_type basic 479 480 test: 481 suffix: 3 482 nsize: 4 483 filter: grep -v "type" | grep -v "sort" 484 args: -test_degree -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 485 requires: define(PETSC_HAVE_MPI_ONE_SIDED) 486 487 test: 488 suffix: 3_basic 489 nsize: 4 490 args: -test_degree -sf_type basic 491 492 test: 493 suffix: 4 494 nsize: 4 495 filter: grep -v "type" | grep -v "sort" 496 args: -test_gather -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 497 requires: define(PETSC_HAVE_MPI_ONE_SIDED) 498 499 test: 500 suffix: 4_basic 501 nsize: 4 502 args: -test_gather -sf_type basic 503 504 test: 505 suffix: 4_stride 506 nsize: 4 507 args: -test_gather -sf_type basic -stride 2 508 509 test: 510 suffix: 5 511 nsize: 4 512 filter: grep -v "type" | grep -v "sort" 513 args: -test_scatter -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 514 requires: define(PETSC_HAVE_MPI_ONE_SIDED) 515 516 test: 517 suffix: 5_basic 518 nsize: 4 519 args: -test_scatter -sf_type basic 520 521 test: 522 suffix: 5_stride 523 nsize: 4 524 args: -test_scatter -sf_type basic -stride 2 525 526 test: 527 suffix: 6 528 nsize: 4 529 filter: grep -v "type" | grep -v "sort" 530 # No -sf_window_flavor dynamic due to bug https://gitlab.com/petsc/petsc/issues/555 531 args: -test_embed -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}} 532 requires: define(PETSC_HAVE_MPI_ONE_SIDED) 533 534 test: 535 suffix: 6_basic 536 nsize: 4 537 args: -test_embed -sf_type basic 538 539 test: 540 suffix: 7 541 nsize: 4 542 filter: grep -v "type" | grep -v "sort" 543 args: -test_invert -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 544 requires: define(PETSC_HAVE_MPI_ONE_SIDED) 545 546 test: 547 suffix: 7_basic 548 nsize: 4 549 args: -test_invert -sf_type basic 550 551 test: 552 suffix: basic 553 nsize: 4 554 args: -test_bcast -sf_type basic 555 output_file: output/ex1_1_basic.out 556 557 test: 558 suffix: bcastop_basic 559 nsize: 4 560 args: -test_bcastop -sf_type basic 561 output_file: output/ex1_bcastop_basic.out 562 563 test: 564 suffix: 8 565 nsize: 3 566 filter: grep -v "type" | grep -v "sort" 567 args: -test_bcast -test_sf_distribute -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 568 requires: define(PETSC_HAVE_MPI_ONE_SIDED) 569 570 test: 571 suffix: 8_basic 572 nsize: 3 573 args: -test_bcast -test_sf_distribute -sf_type basic 574 575 test: 576 suffix: 9_char 577 nsize: 4 578 args: -sf_type basic -test_bcast -test_reduce -test_op max -test_char 579 580 # Here we do not test -sf_window_flavor dynamic since it is designed for repeated SFs with few different rootdata pointers 581 test: 582 suffix: 10 583 filter: grep -v "type" | grep -v "sort" 584 nsize: 4 585 args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}} -test_all -test_bcastop 0 -test_fetchandop 0 586 requires: define(PETSC_HAVE_MPI_ONE_SIDED) 587 588 # The nightly test suite with MPICH uses ch3:sock, which is broken when winsize == 0 in some of the processes 589 test: 590 suffix: 10_shared 591 output_file: output/ex1_10.out 592 filter: grep -v "type" | grep -v "sort" 593 nsize: 4 594 args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor shared -test_all -test_bcastop 0 -test_fetchandop 0 595 requires: define(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) !define(PETSC_HAVE_MPICH_NUMVERSION) define(PETSC_HAVE_MPI_ONE_SIDED) 596 597 test: 598 suffix: 10_basic 599 nsize: 4 600 args: -sf_type basic -test_all -test_bcastop 0 -test_fetchandop 0 601 602 TEST*/ 603