1 static const char help[] = "Test star forest communication (PetscSF)\n\n"; 2 3 /*T 4 Description: A star is a simple tree with one root and zero or more leaves. 5 A star forest is a union of disjoint stars. 6 Many common communication patterns can be expressed as updates of rootdata using leafdata and vice-versa. 7 This example creates a star forest, communicates values using the graph (see options for types of communication), views the graph, then destroys it. 8 T*/ 9 10 /* 11 Include petscsf.h so we can use PetscSF objects. Note that this automatically 12 includes petscsys.h. 13 */ 14 #include <petscsf.h> 15 #include <petscviewer.h> 16 17 /* like PetscSFView() but with alternative array of local indices */ 18 static PetscErrorCode PetscSFViewCustomLocals_Private(PetscSF sf,const PetscInt locals[],PetscViewer viewer) 19 { 20 const PetscSFNode *iremote; 21 PetscInt i,nroots,nleaves,nranks; 22 PetscMPIInt rank; 23 PetscErrorCode ierr; 24 25 PetscFunctionBeginUser; 26 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 27 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 28 ierr = PetscSFGetRootRanks(sf,&nranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 29 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 30 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 31 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,nroots,nleaves,nranks);CHKERRQ(ierr); 32 for (i=0; i<nleaves; i++) { 33 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,locals[i],iremote[i].rank,iremote[i].index);CHKERRQ(ierr); 34 } 35 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 36 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 37 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 38 PetscFunctionReturn(0); 39 } 40 41 int main(int argc,char **argv) 42 { 43 PetscErrorCode ierr; 44 PetscInt i,nroots,nrootsalloc,nleaves,nleavesalloc,*mine,stride; 45 PetscSFNode *remote; 46 PetscMPIInt rank,size; 47 PetscSF sf; 48 PetscBool test_all,test_bcast,test_bcastop,test_reduce,test_degree,test_fetchandop,test_gather,test_scatter,test_embed,test_invert,test_sf_distribute,test_char; 49 MPI_Op mop=MPI_OP_NULL; /* initialize to prevent compiler warnings with cxx_quad build */ 50 char opstring[256]; 51 PetscBool strflg; 52 53 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 54 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRMPI(ierr); 55 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRMPI(ierr); 56 57 ierr = PetscOptionsBegin(PETSC_COMM_WORLD,"","PetscSF Test Options","none");CHKERRQ(ierr); 58 test_all = PETSC_FALSE; 59 ierr = PetscOptionsBool("-test_all","Test all SF communications","",test_all,&test_all,NULL);CHKERRQ(ierr); 60 test_bcast = test_all; 61 ierr = PetscOptionsBool("-test_bcast","Test broadcast","",test_bcast,&test_bcast,NULL);CHKERRQ(ierr); 62 test_bcastop = test_all; 63 ierr = PetscOptionsBool("-test_bcastop","Test broadcast and reduce","",test_bcastop,&test_bcastop,NULL);CHKERRQ(ierr); 64 test_reduce = test_all; 65 ierr = PetscOptionsBool("-test_reduce","Test reduction","",test_reduce,&test_reduce,NULL);CHKERRQ(ierr); 66 test_char = test_all; 67 ierr = PetscOptionsBool("-test_char","Test signed char, unsigned char, and char","",test_char,&test_char,NULL);CHKERRQ(ierr); 68 mop = MPI_SUM; 69 ierr = PetscStrcpy(opstring,"sum");CHKERRQ(ierr); 70 ierr = PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,sizeof(opstring),NULL);CHKERRQ(ierr); 71 ierr = PetscStrcmp("sum",opstring,&strflg);CHKERRQ(ierr); 72 if (strflg) { 73 mop = MPIU_SUM; 74 } 75 ierr = PetscStrcmp("prod",opstring,&strflg);CHKERRQ(ierr); 76 if (strflg) { 77 mop = MPI_PROD; 78 } 79 ierr = PetscStrcmp("max",opstring,&strflg);CHKERRQ(ierr); 80 if (strflg) { 81 mop = MPI_MAX; 82 } 83 ierr = PetscStrcmp("min",opstring,&strflg);CHKERRQ(ierr); 84 if (strflg) { 85 mop = MPI_MIN; 86 } 87 ierr = PetscStrcmp("land",opstring,&strflg);CHKERRQ(ierr); 88 if (strflg) { 89 mop = MPI_LAND; 90 } 91 ierr = PetscStrcmp("band",opstring,&strflg);CHKERRQ(ierr); 92 if (strflg) { 93 mop = MPI_BAND; 94 } 95 ierr = PetscStrcmp("lor",opstring,&strflg);CHKERRQ(ierr); 96 if (strflg) { 97 mop = MPI_LOR; 98 } 99 ierr = PetscStrcmp("bor",opstring,&strflg);CHKERRQ(ierr); 100 if (strflg) { 101 mop = MPI_BOR; 102 } 103 ierr = PetscStrcmp("lxor",opstring,&strflg);CHKERRQ(ierr); 104 if (strflg) { 105 mop = MPI_LXOR; 106 } 107 ierr = PetscStrcmp("bxor",opstring,&strflg);CHKERRQ(ierr); 108 if (strflg) { 109 mop = MPI_BXOR; 110 } 111 test_degree = test_all; 112 ierr = PetscOptionsBool("-test_degree","Test computation of vertex degree","",test_degree,&test_degree,NULL);CHKERRQ(ierr); 113 test_fetchandop = test_all; 114 ierr = PetscOptionsBool("-test_fetchandop","Test atomic Fetch-And-Op","",test_fetchandop,&test_fetchandop,NULL);CHKERRQ(ierr); 115 test_gather = test_all; 116 ierr = PetscOptionsBool("-test_gather","Test point gather","",test_gather,&test_gather,NULL);CHKERRQ(ierr); 117 test_scatter = test_all; 118 ierr = PetscOptionsBool("-test_scatter","Test point scatter","",test_scatter,&test_scatter,NULL);CHKERRQ(ierr); 119 test_embed = test_all; 120 ierr = PetscOptionsBool("-test_embed","Test point embed","",test_embed,&test_embed,NULL);CHKERRQ(ierr); 121 test_invert = test_all; 122 ierr = PetscOptionsBool("-test_invert","Test point invert","",test_invert,&test_invert,NULL);CHKERRQ(ierr); 123 stride = 1; 124 ierr = PetscOptionsInt("-stride","Stride for leaf and root data","",stride,&stride,NULL);CHKERRQ(ierr); 125 test_sf_distribute = PETSC_FALSE; 126 ierr = PetscOptionsBool("-test_sf_distribute","Create an SF that 'distributes' to each process, like an alltoall","",test_sf_distribute,&test_sf_distribute,NULL);CHKERRQ(ierr); 127 ierr = PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,sizeof(opstring),NULL);CHKERRQ(ierr); 128 ierr = PetscOptionsEnd();CHKERRQ(ierr); 129 130 if (test_sf_distribute) { 131 nroots = size; 132 nrootsalloc = size; 133 nleaves = size; 134 nleavesalloc = size; 135 mine = NULL; 136 ierr = PetscMalloc1(nleaves,&remote);CHKERRQ(ierr); 137 for (i=0; i<size; i++) { 138 remote[i].rank = i; 139 remote[i].index = rank; 140 } 141 } else { 142 nroots = 2 + (PetscInt)(rank == 0); 143 nrootsalloc = nroots * stride; 144 nleaves = 2 + (PetscInt)(rank > 0); 145 nleavesalloc = nleaves * stride; 146 mine = NULL; 147 if (stride > 1) { 148 PetscInt i; 149 150 ierr = PetscMalloc1(nleaves,&mine);CHKERRQ(ierr); 151 for (i = 0; i < nleaves; i++) { 152 mine[i] = stride * i; 153 } 154 } 155 ierr = PetscMalloc1(nleaves,&remote);CHKERRQ(ierr); 156 /* Left periodic neighbor */ 157 remote[0].rank = (rank+size-1)%size; 158 remote[0].index = 1 * stride; 159 /* Right periodic neighbor */ 160 remote[1].rank = (rank+1)%size; 161 remote[1].index = 0 * stride; 162 if (rank > 0) { /* All processes reference rank 0, index 1 */ 163 remote[2].rank = 0; 164 remote[2].index = 2 * stride; 165 } 166 } 167 168 /* Create a star forest for communication. In this example, the leaf space is dense, so we pass NULL. */ 169 ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); 170 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 171 ierr = PetscSFSetGraph(sf,nrootsalloc,nleaves,mine,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 172 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 173 174 /* View graph, mostly useful for debugging purposes. */ 175 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);CHKERRQ(ierr); 176 ierr = PetscSFView(sf,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 177 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 178 179 if (test_bcast) { /* broadcast rootdata into leafdata */ 180 PetscInt *rootdata,*leafdata; 181 /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including 182 * user-defined structures, could also be used. */ 183 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 184 /* Set rootdata buffer to be broadcast */ 185 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 186 for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i; 187 /* Initialize local buffer, these values are never used. */ 188 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 189 /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */ 190 ierr = PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 191 ierr = PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 192 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata\n");CHKERRQ(ierr); 193 ierr = PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 194 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata\n");CHKERRQ(ierr); 195 ierr = PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 196 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 197 } 198 199 if (test_bcast && test_char) { /* Bcast with char */ 200 PetscInt len; 201 char buf[256]; 202 char *rootdata,*leafdata; 203 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 204 /* Set rootdata buffer to be broadcast */ 205 for (i=0; i<nrootsalloc; i++) rootdata[i] = '*'; 206 for (i=0; i<nroots; i++) rootdata[i*stride] = 'A' + rank*3 + i; /* rank is very small, so it is fine to compute a char */ 207 /* Initialize local buffer, these values are never used. */ 208 for (i=0; i<nleavesalloc; i++) leafdata[i] = '?'; 209 210 ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 211 ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 212 213 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata in type of char\n");CHKERRQ(ierr); 214 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 215 for (i=0; i<nrootsalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5c",rootdata[i]);CHKERRQ(ierr); len += 5;} 216 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 217 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 218 219 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata in type of char\n");CHKERRQ(ierr); 220 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 221 for (i=0; i<nleavesalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5c",leafdata[i]);CHKERRQ(ierr); len += 5;} 222 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 223 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 224 225 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 226 } 227 228 if (test_bcastop) { /* Reduce rootdata into leafdata */ 229 PetscInt *rootdata,*leafdata; 230 /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including 231 * user-defined structures, could also be used. */ 232 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 233 /* Set rootdata buffer to be broadcast */ 234 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 235 for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i; 236 /* Set leaf values to reduce with */ 237 for (i=0; i<nleavesalloc; i++) leafdata[i] = -10*(rank+1) - i; 238 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-BcastAndOp Leafdata\n");CHKERRQ(ierr); 239 ierr = PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 240 /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */ 241 ierr = PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata,mop);CHKERRQ(ierr); 242 ierr = PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata,mop);CHKERRQ(ierr); 243 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Rootdata\n");CHKERRQ(ierr); 244 ierr = PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 245 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Leafdata\n");CHKERRQ(ierr); 246 ierr = PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 247 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 248 } 249 250 if (test_reduce) { /* Reduce leafdata into rootdata */ 251 PetscInt *rootdata,*leafdata; 252 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 253 /* Initialize rootdata buffer in which the result of the reduction will appear. */ 254 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 255 for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i; 256 /* Set leaf values to reduce. */ 257 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 258 for (i=0; i<nleaves; i++) leafdata[i*stride] = 1000*(rank+1) + 10*i; 259 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata\n");CHKERRQ(ierr); 260 ierr = PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 261 /* Perform reduction. Computation or other communication can be performed between the begin and end calls. 262 * This example sums the values, but other MPI_Ops can be used (e.g MPI_MAX, MPI_PROD). */ 263 ierr = PetscSFReduceBegin(sf,MPIU_INT,leafdata,rootdata,mop);CHKERRQ(ierr); 264 ierr = PetscSFReduceEnd(sf,MPIU_INT,leafdata,rootdata,mop);CHKERRQ(ierr); 265 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata\n");CHKERRQ(ierr); 266 ierr = PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 267 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata\n");CHKERRQ(ierr); 268 ierr = PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 269 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 270 } 271 272 if (test_reduce && test_char) { /* Reduce with signed char */ 273 PetscInt len; 274 char buf[256]; 275 signed char *rootdata,*leafdata; 276 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 277 /* Initialize rootdata buffer in which the result of the reduction will appear. */ 278 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 279 for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i; 280 /* Set leaf values to reduce. */ 281 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 282 for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i; 283 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of signed char\n");CHKERRQ(ierr); 284 285 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 286 for (i=0; i<nrootsalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i]);CHKERRQ(ierr); len += 5;} 287 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 288 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 289 290 /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR. 291 Testing with -test_op max, one can see the sign does take effect in MPI_MAX. 292 */ 293 ierr = PetscSFReduceBegin(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop);CHKERRQ(ierr); 294 ierr = PetscSFReduceEnd(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop);CHKERRQ(ierr); 295 296 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of signed char\n");CHKERRQ(ierr); 297 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 298 for (i=0; i<nleavesalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5d",leafdata[i]);CHKERRQ(ierr); len += 5;} 299 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 300 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 301 302 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of signed char\n");CHKERRQ(ierr); 303 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 304 for (i=0; i<nrootsalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i]);CHKERRQ(ierr); len += 5;} 305 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 306 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 307 308 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 309 } 310 311 if (test_reduce && test_char) { /* Reduce with unsigned char */ 312 PetscInt len; 313 char buf[256]; 314 unsigned char *rootdata,*leafdata; 315 ierr = PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);CHKERRQ(ierr); 316 /* Initialize rootdata buffer in which the result of the reduction will appear. */ 317 for (i=0; i<nrootsalloc; i++) rootdata[i] = 0; 318 for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i; 319 /* Set leaf values to reduce. */ 320 for (i=0; i<nleavesalloc; i++) leafdata[i] = 0; 321 for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i; 322 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of unsigned char\n");CHKERRQ(ierr); 323 324 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 325 for (i=0; i<nrootsalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i]);CHKERRQ(ierr); len += 5;} 326 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 327 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 328 329 /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR. 330 Testing with -test_op max, one can see the sign does take effect in MPI_MAX. 331 */ 332 ierr = PetscSFReduceBegin(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop);CHKERRQ(ierr); 334 335 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of unsigned char\n");CHKERRQ(ierr); 336 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 337 for (i=0; i<nleavesalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5u",leafdata[i]);CHKERRQ(ierr); len += 5;} 338 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 339 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 340 341 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of unsigned char\n");CHKERRQ(ierr); 342 len = 0; ierr = PetscSNPrintf(buf,256,"%4d:",rank);CHKERRQ(ierr); len += 5; 343 for (i=0; i<nrootsalloc; i++) {ierr = PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i]);CHKERRQ(ierr); len += 5;} 344 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);CHKERRQ(ierr); 345 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); 346 347 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 348 } 349 350 if (test_degree) { 351 const PetscInt *degree; 352 ierr = PetscSFComputeDegreeBegin(sf,°ree);CHKERRQ(ierr); 353 ierr = PetscSFComputeDegreeEnd(sf,°ree);CHKERRQ(ierr); 354 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Root degrees\n");CHKERRQ(ierr); 355 ierr = PetscIntView(nrootsalloc,degree,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 356 } 357 358 if (test_fetchandop) { 359 /* Cannot use text compare here because token ordering is not deterministic */ 360 PetscInt *leafdata,*leafupdate,*rootdata; 361 ierr = PetscMalloc3(nleavesalloc,&leafdata,nleavesalloc,&leafupdate,nrootsalloc,&rootdata);CHKERRQ(ierr); 362 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 363 for (i=0; i<nleaves; i++) leafdata[i*stride] = 1; 364 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 365 for (i=0; i<nroots; i++) rootdata[i*stride] = 0; 366 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);CHKERRQ(ierr); 367 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);CHKERRQ(ierr); 368 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Rootdata (sum of 1 from each leaf)\n");CHKERRQ(ierr); 369 ierr = PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 370 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Leafupdate (value at roots prior to my atomic update)\n");CHKERRQ(ierr); 371 ierr = PetscIntView(nleavesalloc,leafupdate,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 372 ierr = PetscFree3(leafdata,leafupdate,rootdata);CHKERRQ(ierr); 373 } 374 375 if (test_gather) { 376 const PetscInt *degree; 377 PetscInt inedges,*indata,*outdata; 378 ierr = PetscSFComputeDegreeBegin(sf,°ree);CHKERRQ(ierr); 379 ierr = PetscSFComputeDegreeEnd(sf,°ree);CHKERRQ(ierr); 380 for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i]; 381 ierr = PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);CHKERRQ(ierr); 382 for (i=0; i<nleavesalloc; i++) outdata[i] = -1; 383 for (i=0; i<nleaves; i++) outdata[i*stride] = 1000*(rank+1) + i; 384 ierr = PetscSFGatherBegin(sf,MPIU_INT,outdata,indata);CHKERRQ(ierr); 385 ierr = PetscSFGatherEnd(sf,MPIU_INT,outdata,indata);CHKERRQ(ierr); 386 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Gathered data at multi-roots from leaves\n");CHKERRQ(ierr); 387 ierr = PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 388 ierr = PetscFree2(indata,outdata);CHKERRQ(ierr); 389 } 390 391 if (test_scatter) { 392 const PetscInt *degree; 393 PetscInt j,count,inedges,*indata,*outdata; 394 ierr = PetscSFComputeDegreeBegin(sf,°ree);CHKERRQ(ierr); 395 ierr = PetscSFComputeDegreeEnd(sf,°ree);CHKERRQ(ierr); 396 for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i]; 397 ierr = PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);CHKERRQ(ierr); 398 for (i=0; i<nleavesalloc; i++) outdata[i] = -1; 399 for (i=0,count=0; i<nrootsalloc; i++) { 400 for (j=0; j<degree[i]; j++) indata[count++] = 1000*(rank+1) + 100*i + j; 401 } 402 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Data at multi-roots, to scatter to leaves\n");CHKERRQ(ierr); 403 ierr = PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 404 405 ierr = PetscSFScatterBegin(sf,MPIU_INT,indata,outdata);CHKERRQ(ierr); 406 ierr = PetscSFScatterEnd(sf,MPIU_INT,indata,outdata);CHKERRQ(ierr); 407 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Scattered data at leaves\n");CHKERRQ(ierr); 408 ierr = PetscIntView(nleavesalloc,outdata,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 409 ierr = PetscFree2(indata,outdata);CHKERRQ(ierr); 410 } 411 412 if (test_embed) { 413 const PetscInt nroots = 1 + (PetscInt) (rank == 0); 414 PetscInt selected[2]; 415 PetscSF esf; 416 417 selected[0] = stride; 418 selected[1] = 2*stride; 419 ierr = PetscSFCreateEmbeddedRootSF(sf,nroots,selected,&esf);CHKERRQ(ierr); 420 ierr = PetscSFSetUp(esf);CHKERRQ(ierr); 421 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Embedded PetscSF\n");CHKERRQ(ierr); 422 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);CHKERRQ(ierr); 423 ierr = PetscSFView(esf,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 424 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 425 ierr = PetscSFDestroy(&esf);CHKERRQ(ierr); 426 } 427 428 if (test_invert) { 429 const PetscInt *degree; 430 PetscInt *mRootsOrigNumbering; 431 PetscInt inedges; 432 PetscSF msf,imsf; 433 434 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 435 ierr = PetscSFCreateInverseSF(msf,&imsf);CHKERRQ(ierr); 436 ierr = PetscSFSetUp(msf);CHKERRQ(ierr); 437 ierr = PetscSFSetUp(imsf);CHKERRQ(ierr); 438 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF\n");CHKERRQ(ierr); 439 ierr = PetscSFView(msf,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 440 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF roots indices in original SF roots numbering\n");CHKERRQ(ierr); 441 ierr = PetscSFComputeDegreeBegin(sf,°ree);CHKERRQ(ierr); 442 ierr = PetscSFComputeDegreeEnd(sf,°ree);CHKERRQ(ierr); 443 ierr = PetscSFComputeMultiRootOriginalNumbering(sf,degree,&inedges,&mRootsOrigNumbering);CHKERRQ(ierr); 444 ierr = PetscIntView(inedges,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 445 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF\n");CHKERRQ(ierr); 446 ierr = PetscSFView(imsf,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 447 ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF, original numbering\n");CHKERRQ(ierr); 448 ierr = PetscSFViewCustomLocals_Private(imsf,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 449 ierr = PetscSFDestroy(&imsf);CHKERRQ(ierr); 450 ierr = PetscFree(mRootsOrigNumbering);CHKERRQ(ierr); 451 } 452 453 /* Clean storage for star forest. */ 454 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 455 ierr = PetscFinalize(); 456 return ierr; 457 } 458 459 /*TEST 460 461 test: 462 nsize: 4 463 filter: grep -v "type" | grep -v "sort" 464 args: -test_bcast -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 465 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 466 467 test: 468 suffix: 2 469 nsize: 4 470 filter: grep -v "type" | grep -v "sort" 471 args: -test_reduce -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 472 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 473 474 test: 475 suffix: 2_basic 476 nsize: 4 477 args: -test_reduce -sf_type basic 478 479 test: 480 suffix: 3 481 nsize: 4 482 filter: grep -v "type" | grep -v "sort" 483 args: -test_degree -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 484 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 485 486 test: 487 suffix: 3_basic 488 nsize: 4 489 args: -test_degree -sf_type basic 490 491 test: 492 suffix: 4 493 nsize: 4 494 filter: grep -v "type" | grep -v "sort" 495 args: -test_gather -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 496 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 497 498 test: 499 suffix: 4_basic 500 nsize: 4 501 args: -test_gather -sf_type basic 502 503 test: 504 suffix: 4_stride 505 nsize: 4 506 args: -test_gather -sf_type basic -stride 2 507 508 test: 509 suffix: 5 510 nsize: 4 511 filter: grep -v "type" | grep -v "sort" 512 args: -test_scatter -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 513 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 514 515 test: 516 suffix: 5_basic 517 nsize: 4 518 args: -test_scatter -sf_type basic 519 520 test: 521 suffix: 5_stride 522 nsize: 4 523 args: -test_scatter -sf_type basic -stride 2 524 525 test: 526 suffix: 6 527 nsize: 4 528 filter: grep -v "type" | grep -v "sort" 529 # No -sf_window_flavor dynamic due to bug https://gitlab.com/petsc/petsc/issues/555 530 args: -test_embed -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}} 531 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 532 533 test: 534 suffix: 6_basic 535 nsize: 4 536 args: -test_embed -sf_type basic 537 538 test: 539 suffix: 7 540 nsize: 4 541 filter: grep -v "type" | grep -v "sort" 542 args: -test_invert -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 543 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 544 545 test: 546 suffix: 7_basic 547 nsize: 4 548 args: -test_invert -sf_type basic 549 550 test: 551 suffix: basic 552 nsize: 4 553 args: -test_bcast -sf_type basic 554 output_file: output/ex1_1_basic.out 555 556 test: 557 suffix: bcastop_basic 558 nsize: 4 559 args: -test_bcastop -sf_type basic 560 output_file: output/ex1_bcastop_basic.out 561 562 test: 563 suffix: 8 564 nsize: 3 565 filter: grep -v "type" | grep -v "sort" 566 args: -test_bcast -test_sf_distribute -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 567 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 568 569 test: 570 suffix: 8_basic 571 nsize: 3 572 args: -test_bcast -test_sf_distribute -sf_type basic 573 574 test: 575 suffix: 9_char 576 nsize: 4 577 args: -sf_type basic -test_bcast -test_reduce -test_op max -test_char 578 579 # Here we do not test -sf_window_flavor dynamic since it is designed for repeated SFs with few different rootdata pointers 580 test: 581 suffix: 10 582 filter: grep -v "type" | grep -v "sort" 583 nsize: 4 584 args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}} -test_all -test_bcastop 0 -test_fetchandop 0 585 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 586 587 # The nightly test suite with MPICH uses ch3:sock, which is broken when winsize == 0 in some of the processes 588 test: 589 suffix: 10_shared 590 output_file: output/ex1_10.out 591 filter: grep -v "type" | grep -v "sort" 592 nsize: 4 593 args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor shared -test_all -test_bcastop 0 -test_fetchandop 0 594 requires: defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) !defined(PETSC_HAVE_MPICH_NUMVERSION) defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 595 596 test: 597 suffix: 10_basic 598 nsize: 4 599 args: -sf_type basic -test_all -test_bcastop 0 -test_fetchandop 0 600 601 TEST*/ 602