1 static const char help[] = "Test star forest communication (PetscSF)\n\n"; 2 3 /*T 4 Description: A star is a simple tree with one root and zero or more leaves. 5 A star forest is a union of disjoint stars. 6 Many common communication patterns can be expressed as updates of rootdata using leafdata and vice-versa. 7 This example creates a star forest, communicates values using the graph (see options for types of communication), views the graph, then destroys it. 8 T*/ 9 10 /* 11 Include petscsf.h so we can use PetscSF objects. Note that this automatically 12 includes petscsys.h. 13 */ 14 #include <petscsf.h> 15 #include <petscviewer.h> 16 17 /* like PetscSFView() but with alternative array of local indices */ 18 static PetscErrorCode PetscSFViewCustomLocals_Private(PetscSF sf,const PetscInt locals[],PetscViewer viewer) 19 { 20 const PetscSFNode *iremote; 21 PetscInt i,nroots,nleaves,nranks; 22 PetscMPIInt rank; 23 24 PetscFunctionBeginUser; 25 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 26 CHKERRQ(PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote)); 27 CHKERRQ(PetscSFGetRootRanks(sf,&nranks,NULL,NULL,NULL,NULL)); 28 CHKERRQ(PetscViewerASCIIPushTab(viewer)); 29 CHKERRQ(PetscViewerASCIIPushSynchronized(viewer)); 30 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,nroots,nleaves,nranks)); 31 for (i=0; i<nleaves; i++) { 32 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,locals[i],iremote[i].rank,iremote[i].index)); 33 } 34 CHKERRQ(PetscViewerFlush(viewer)); 35 CHKERRQ(PetscViewerASCIIPopTab(viewer)); 36 CHKERRQ(PetscViewerASCIIPopSynchronized(viewer)); 37 PetscFunctionReturn(0); 38 } 39 40 int main(int argc,char **argv) 41 { 42 PetscErrorCode ierr; 43 PetscInt i,nroots,nrootsalloc,nleaves,nleavesalloc,*mine,stride; 44 PetscSFNode *remote; 45 PetscMPIInt rank,size; 46 PetscSF sf; 47 PetscBool test_all,test_bcast,test_bcastop,test_reduce,test_degree,test_fetchandop,test_gather,test_scatter,test_embed,test_invert,test_sf_distribute,test_char; 48 MPI_Op mop=MPI_OP_NULL; /* initialize to prevent compiler warnings with cxx_quad build */ 49 char opstring[256]; 50 PetscBool strflg; 51 52 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 53 CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 54 CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 55 56 ierr = PetscOptionsBegin(PETSC_COMM_WORLD,"","PetscSF Test Options","none");CHKERRQ(ierr); 57 test_all = PETSC_FALSE; 58 CHKERRQ(PetscOptionsBool("-test_all","Test all SF communications","",test_all,&test_all,NULL)); 59 test_bcast = test_all; 60 CHKERRQ(PetscOptionsBool("-test_bcast","Test broadcast","",test_bcast,&test_bcast,NULL)); 61 test_bcastop = test_all; 62 CHKERRQ(PetscOptionsBool("-test_bcastop","Test broadcast and reduce","",test_bcastop,&test_bcastop,NULL)); 63 test_reduce = test_all; 64 CHKERRQ(PetscOptionsBool("-test_reduce","Test reduction","",test_reduce,&test_reduce,NULL)); 65 test_char = test_all; 66 CHKERRQ(PetscOptionsBool("-test_char","Test signed char, unsigned char, and char","",test_char,&test_char,NULL)); 67 mop = MPI_SUM; 68 CHKERRQ(PetscStrcpy(opstring,"sum")); 69 CHKERRQ(PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,sizeof(opstring),NULL)); 70 CHKERRQ(PetscStrcmp("sum",opstring,&strflg)); 71 if (strflg) { 72 mop = MPIU_SUM; 73 } 74 CHKERRQ(PetscStrcmp("prod",opstring,&strflg)); 75 if (strflg) { 76 mop = MPI_PROD; 77 } 78 CHKERRQ(PetscStrcmp("max",opstring,&strflg)); 79 if (strflg) { 80 mop = MPI_MAX; 81 } 82 CHKERRQ(PetscStrcmp("min",opstring,&strflg)); 83 if (strflg) { 84 mop = MPI_MIN; 85 } 86 CHKERRQ(PetscStrcmp("land",opstring,&strflg)); 87 if (strflg) { 88 mop = MPI_LAND; 89 } 90 CHKERRQ(PetscStrcmp("band",opstring,&strflg)); 91 if (strflg) { 92 mop = MPI_BAND; 93 } 94 CHKERRQ(PetscStrcmp("lor",opstring,&strflg)); 95 if (strflg) { 96 mop = MPI_LOR; 97 } 98 CHKERRQ(PetscStrcmp("bor",opstring,&strflg)); 99 if (strflg) { 100 mop = MPI_BOR; 101 } 102 CHKERRQ(PetscStrcmp("lxor",opstring,&strflg)); 103 if (strflg) { 104 mop = MPI_LXOR; 105 } 106 CHKERRQ(PetscStrcmp("bxor",opstring,&strflg)); 107 if (strflg) { 108 mop = MPI_BXOR; 109 } 110 test_degree = test_all; 111 CHKERRQ(PetscOptionsBool("-test_degree","Test computation of vertex degree","",test_degree,&test_degree,NULL)); 112 test_fetchandop = test_all; 113 CHKERRQ(PetscOptionsBool("-test_fetchandop","Test atomic Fetch-And-Op","",test_fetchandop,&test_fetchandop,NULL)); 114 test_gather = test_all; 115 CHKERRQ(PetscOptionsBool("-test_gather","Test point gather","",test_gather,&test_gather,NULL)); 116 test_scatter = test_all; 117 CHKERRQ(PetscOptionsBool("-test_scatter","Test point scatter","",test_scatter,&test_scatter,NULL)); 118 test_embed = test_all; 119 CHKERRQ(PetscOptionsBool("-test_embed","Test point embed","",test_embed,&test_embed,NULL)); 120 test_invert = test_all; 121 CHKERRQ(PetscOptionsBool("-test_invert","Test point invert","",test_invert,&test_invert,NULL)); 122 stride = 1; 123 CHKERRQ(PetscOptionsInt("-stride","Stride for leaf and root data","",stride,&stride,NULL)); 124 test_sf_distribute = PETSC_FALSE; 125 CHKERRQ(PetscOptionsBool("-test_sf_distribute","Create an SF that 'distributes' to each process, like an alltoall","",test_sf_distribute,&test_sf_distribute,NULL)); 126 CHKERRQ(PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,sizeof(opstring),NULL)); 127 ierr = PetscOptionsEnd();CHKERRQ(ierr); 128 129 if (test_sf_distribute) { 130 nroots = size; 131 nrootsalloc = size; 132 nleaves = size; 133 nleavesalloc = size; 134 mine = NULL; 135 CHKERRQ(PetscMalloc1(nleaves,&remote)); 136 for (i=0; i<size; i++) { 137 remote[i].rank = i; 138 remote[i].index = rank; 139 } 140 } else { 141 nroots = 2 + (PetscInt)(rank == 0); 142 nrootsalloc = nroots * stride; 143 nleaves = 2 + (PetscInt)(rank > 0); 144 nleavesalloc = nleaves * stride; 145 mine = NULL; 146 if (stride > 1) { 147 PetscInt i; 148 149 CHKERRQ(PetscMalloc1(nleaves,&mine)); 150 for (i = 0; i < nleaves; i++) { 151 mine[i] = stride * i; 152 } 153 } 154 CHKERRQ(PetscMalloc1(nleaves,&remote)); 155 /* Left periodic neighbor */ 156 remote[0].rank = (rank+size-1)%size; 157 remote[0].index = 1 * stride; 158 /* Right periodic neighbor */ 159 remote[1].rank = (rank+1)%size; 160 remote[1].index = 0 * stride; 161 if (rank > 0) { /* All processes reference rank 0, index 1 */ 162 remote[2].rank = 0; 163 remote[2].index = 2 * stride; 164 } 165 } 166 167 /* Create a star forest for communication. In this example, the leaf space is dense, so we pass NULL. */ 168 CHKERRQ(PetscSFCreate(PETSC_COMM_WORLD,&sf)); 169 CHKERRQ(PetscSFSetFromOptions(sf)); 170 CHKERRQ(PetscSFSetGraph(sf,nrootsalloc,nleaves,mine,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER)); 171 CHKERRQ(PetscSFSetUp(sf)); 172 173 /* View graph, mostly useful for debugging purposes. */ 174 CHKERRQ(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL)); 175 CHKERRQ(PetscSFView(sf,PETSC_VIEWER_STDOUT_WORLD)); 176 CHKERRQ(PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD)); 177 178 if (test_bcast) { /* broadcast rootdata into leafdata */ 179 PetscInt *rootdata,*leafdata; 180 /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including 181 * user-defined structures, could also be used. */ 182 CHKERRQ(PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata)); 183 /* Set rootdata buffer to be broadcast */ 184 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 185 for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i; 186 /* Initialize local buffer, these values are never used. */ 187 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 188 /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */ 189 CHKERRQ(PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata,MPI_REPLACE)); 190 CHKERRQ(PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata,MPI_REPLACE)); 191 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata\n")); 192 CHKERRQ(PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD)); 193 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata\n")); 194 CHKERRQ(PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD)); 195 CHKERRQ(PetscFree2(rootdata,leafdata)); 196 } 197 198 if (test_bcast && test_char) { /* Bcast with char */ 199 PetscInt len; 200 char buf[256]; 201 char *rootdata,*leafdata; 202 CHKERRQ(PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata)); 203 /* Set rootdata buffer to be broadcast */ 204 for (i=0; i<nrootsalloc; i++) rootdata[i] = '*'; 205 for (i=0; i<nroots; i++) rootdata[i*stride] = 'A' + rank*3 + i; /* rank is very small, so it is fine to compute a char */ 206 /* Initialize local buffer, these values are never used. */ 207 for (i=0; i<nleavesalloc; i++) leafdata[i] = '?'; 208 209 CHKERRQ(PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE)); 210 CHKERRQ(PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE)); 211 212 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata in type of char\n")); 213 len = 0; CHKERRQ(PetscSNPrintf(buf,256,"%4d:",rank)); len += 5; 214 for (i=0; i<nrootsalloc; i++) {CHKERRQ(PetscSNPrintf(buf+len,256-len,"%5c",rootdata[i])); len += 5;} 215 CHKERRQ(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf)); 216 CHKERRQ(PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT)); 217 218 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata in type of char\n")); 219 len = 0; CHKERRQ(PetscSNPrintf(buf,256,"%4d:",rank)); len += 5; 220 for (i=0; i<nleavesalloc; i++) {CHKERRQ(PetscSNPrintf(buf+len,256-len,"%5c",leafdata[i])); len += 5;} 221 CHKERRQ(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf)); 222 CHKERRQ(PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT)); 223 224 CHKERRQ(PetscFree2(rootdata,leafdata)); 225 } 226 227 if (test_bcastop) { /* Reduce rootdata into leafdata */ 228 PetscInt *rootdata,*leafdata; 229 /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including 230 * user-defined structures, could also be used. */ 231 CHKERRQ(PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata)); 232 /* Set rootdata buffer to be broadcast */ 233 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 234 for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i; 235 /* Set leaf values to reduce with */ 236 for (i=0; i<nleavesalloc; i++) leafdata[i] = -10*(rank+1) - i; 237 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-BcastAndOp Leafdata\n")); 238 CHKERRQ(PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD)); 239 /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */ 240 CHKERRQ(PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata,mop)); 241 CHKERRQ(PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata,mop)); 242 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Rootdata\n")); 243 CHKERRQ(PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD)); 244 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Leafdata\n")); 245 CHKERRQ(PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD)); 246 CHKERRQ(PetscFree2(rootdata,leafdata)); 247 } 248 249 if (test_reduce) { /* Reduce leafdata into rootdata */ 250 PetscInt *rootdata,*leafdata; 251 CHKERRQ(PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata)); 252 /* Initialize rootdata buffer in which the result of the reduction will appear. */ 253 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 254 for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i; 255 /* Set leaf values to reduce. */ 256 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 257 for (i=0; i<nleaves; i++) leafdata[i*stride] = 1000*(rank+1) + 10*i; 258 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata\n")); 259 CHKERRQ(PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD)); 260 /* Perform reduction. Computation or other communication can be performed between the begin and end calls. 261 * This example sums the values, but other MPI_Ops can be used (e.g MPI_MAX, MPI_PROD). */ 262 CHKERRQ(PetscSFReduceBegin(sf,MPIU_INT,leafdata,rootdata,mop)); 263 CHKERRQ(PetscSFReduceEnd(sf,MPIU_INT,leafdata,rootdata,mop)); 264 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata\n")); 265 CHKERRQ(PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD)); 266 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata\n")); 267 CHKERRQ(PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD)); 268 CHKERRQ(PetscFree2(rootdata,leafdata)); 269 } 270 271 if (test_reduce && test_char) { /* Reduce with signed char */ 272 PetscInt len; 273 char buf[256]; 274 signed char *rootdata,*leafdata; 275 CHKERRQ(PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata)); 276 /* Initialize rootdata buffer in which the result of the reduction will appear. */ 277 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 278 for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i; 279 /* Set leaf values to reduce. */ 280 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 281 for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i; 282 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of signed char\n")); 283 284 len = 0; CHKERRQ(PetscSNPrintf(buf,256,"%4d:",rank)); len += 5; 285 for (i=0; i<nrootsalloc; i++) {CHKERRQ(PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i])); len += 5;} 286 CHKERRQ(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf)); 287 CHKERRQ(PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT)); 288 289 /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR. 290 Testing with -test_op max, one can see the sign does take effect in MPI_MAX. 291 */ 292 CHKERRQ(PetscSFReduceBegin(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop)); 293 CHKERRQ(PetscSFReduceEnd(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop)); 294 295 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of signed char\n")); 296 len = 0; CHKERRQ(PetscSNPrintf(buf,256,"%4d:",rank)); len += 5; 297 for (i=0; i<nleavesalloc; i++) {CHKERRQ(PetscSNPrintf(buf+len,256-len,"%5d",leafdata[i])); len += 5;} 298 CHKERRQ(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf)); 299 CHKERRQ(PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT)); 300 301 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of signed char\n")); 302 len = 0; CHKERRQ(PetscSNPrintf(buf,256,"%4d:",rank)); len += 5; 303 for (i=0; i<nrootsalloc; i++) {CHKERRQ(PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i])); len += 5;} 304 CHKERRQ(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf)); 305 CHKERRQ(PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT)); 306 307 CHKERRQ(PetscFree2(rootdata,leafdata)); 308 } 309 310 if (test_reduce && test_char) { /* Reduce with unsigned char */ 311 PetscInt len; 312 char buf[256]; 313 unsigned char *rootdata,*leafdata; 314 CHKERRQ(PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata)); 315 /* Initialize rootdata buffer in which the result of the reduction will appear. */ 316 for (i=0; i<nrootsalloc; i++) rootdata[i] = 0; 317 for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i; 318 /* Set leaf values to reduce. */ 319 for (i=0; i<nleavesalloc; i++) leafdata[i] = 0; 320 for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i; 321 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of unsigned char\n")); 322 323 len = 0; CHKERRQ(PetscSNPrintf(buf,256,"%4d:",rank)); len += 5; 324 for (i=0; i<nrootsalloc; i++) {CHKERRQ(PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i])); len += 5;} 325 CHKERRQ(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf)); 326 CHKERRQ(PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT)); 327 328 /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR. 329 Testing with -test_op max, one can see the sign does take effect in MPI_MAX. 330 */ 331 CHKERRQ(PetscSFReduceBegin(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop)); 332 CHKERRQ(PetscSFReduceEnd(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop)); 333 334 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of unsigned char\n")); 335 len = 0; CHKERRQ(PetscSNPrintf(buf,256,"%4d:",rank)); len += 5; 336 for (i=0; i<nleavesalloc; i++) {CHKERRQ(PetscSNPrintf(buf+len,256-len,"%5u",leafdata[i])); len += 5;} 337 CHKERRQ(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf)); 338 CHKERRQ(PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT)); 339 340 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of unsigned char\n")); 341 len = 0; CHKERRQ(PetscSNPrintf(buf,256,"%4d:",rank)); len += 5; 342 for (i=0; i<nrootsalloc; i++) {CHKERRQ(PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i])); len += 5;} 343 CHKERRQ(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf)); 344 CHKERRQ(PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT)); 345 346 CHKERRQ(PetscFree2(rootdata,leafdata)); 347 } 348 349 if (test_degree) { 350 const PetscInt *degree; 351 CHKERRQ(PetscSFComputeDegreeBegin(sf,°ree)); 352 CHKERRQ(PetscSFComputeDegreeEnd(sf,°ree)); 353 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Root degrees\n")); 354 CHKERRQ(PetscIntView(nrootsalloc,degree,PETSC_VIEWER_STDOUT_WORLD)); 355 } 356 357 if (test_fetchandop) { 358 /* Cannot use text compare here because token ordering is not deterministic */ 359 PetscInt *leafdata,*leafupdate,*rootdata; 360 CHKERRQ(PetscMalloc3(nleavesalloc,&leafdata,nleavesalloc,&leafupdate,nrootsalloc,&rootdata)); 361 for (i=0; i<nleavesalloc; i++) leafdata[i] = -1; 362 for (i=0; i<nleaves; i++) leafdata[i*stride] = 1; 363 for (i=0; i<nrootsalloc; i++) rootdata[i] = -1; 364 for (i=0; i<nroots; i++) rootdata[i*stride] = 0; 365 CHKERRQ(PetscSFFetchAndOpBegin(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop)); 366 CHKERRQ(PetscSFFetchAndOpEnd(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop)); 367 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Rootdata (sum of 1 from each leaf)\n")); 368 CHKERRQ(PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD)); 369 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Leafupdate (value at roots prior to my atomic update)\n")); 370 CHKERRQ(PetscIntView(nleavesalloc,leafupdate,PETSC_VIEWER_STDOUT_WORLD)); 371 CHKERRQ(PetscFree3(leafdata,leafupdate,rootdata)); 372 } 373 374 if (test_gather) { 375 const PetscInt *degree; 376 PetscInt inedges,*indata,*outdata; 377 CHKERRQ(PetscSFComputeDegreeBegin(sf,°ree)); 378 CHKERRQ(PetscSFComputeDegreeEnd(sf,°ree)); 379 for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i]; 380 CHKERRQ(PetscMalloc2(inedges,&indata,nleavesalloc,&outdata)); 381 for (i=0; i<nleavesalloc; i++) outdata[i] = -1; 382 for (i=0; i<nleaves; i++) outdata[i*stride] = 1000*(rank+1) + i; 383 CHKERRQ(PetscSFGatherBegin(sf,MPIU_INT,outdata,indata)); 384 CHKERRQ(PetscSFGatherEnd(sf,MPIU_INT,outdata,indata)); 385 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Gathered data at multi-roots from leaves\n")); 386 CHKERRQ(PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD)); 387 CHKERRQ(PetscFree2(indata,outdata)); 388 } 389 390 if (test_scatter) { 391 const PetscInt *degree; 392 PetscInt j,count,inedges,*indata,*outdata; 393 CHKERRQ(PetscSFComputeDegreeBegin(sf,°ree)); 394 CHKERRQ(PetscSFComputeDegreeEnd(sf,°ree)); 395 for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i]; 396 CHKERRQ(PetscMalloc2(inedges,&indata,nleavesalloc,&outdata)); 397 for (i=0; i<nleavesalloc; i++) outdata[i] = -1; 398 for (i=0,count=0; i<nrootsalloc; i++) { 399 for (j=0; j<degree[i]; j++) indata[count++] = 1000*(rank+1) + 100*i + j; 400 } 401 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Data at multi-roots, to scatter to leaves\n")); 402 CHKERRQ(PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD)); 403 404 CHKERRQ(PetscSFScatterBegin(sf,MPIU_INT,indata,outdata)); 405 CHKERRQ(PetscSFScatterEnd(sf,MPIU_INT,indata,outdata)); 406 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Scattered data at leaves\n")); 407 CHKERRQ(PetscIntView(nleavesalloc,outdata,PETSC_VIEWER_STDOUT_WORLD)); 408 CHKERRQ(PetscFree2(indata,outdata)); 409 } 410 411 if (test_embed) { 412 const PetscInt nroots = 1 + (PetscInt) (rank == 0); 413 PetscInt selected[2]; 414 PetscSF esf; 415 416 selected[0] = stride; 417 selected[1] = 2*stride; 418 CHKERRQ(PetscSFCreateEmbeddedRootSF(sf,nroots,selected,&esf)); 419 CHKERRQ(PetscSFSetUp(esf)); 420 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Embedded PetscSF\n")); 421 CHKERRQ(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL)); 422 CHKERRQ(PetscSFView(esf,PETSC_VIEWER_STDOUT_WORLD)); 423 CHKERRQ(PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD)); 424 CHKERRQ(PetscSFDestroy(&esf)); 425 } 426 427 if (test_invert) { 428 const PetscInt *degree; 429 PetscInt *mRootsOrigNumbering; 430 PetscInt inedges; 431 PetscSF msf,imsf; 432 433 CHKERRQ(PetscSFGetMultiSF(sf,&msf)); 434 CHKERRQ(PetscSFCreateInverseSF(msf,&imsf)); 435 CHKERRQ(PetscSFSetUp(msf)); 436 CHKERRQ(PetscSFSetUp(imsf)); 437 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF\n")); 438 CHKERRQ(PetscSFView(msf,PETSC_VIEWER_STDOUT_WORLD)); 439 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF roots indices in original SF roots numbering\n")); 440 CHKERRQ(PetscSFComputeDegreeBegin(sf,°ree)); 441 CHKERRQ(PetscSFComputeDegreeEnd(sf,°ree)); 442 CHKERRQ(PetscSFComputeMultiRootOriginalNumbering(sf,degree,&inedges,&mRootsOrigNumbering)); 443 CHKERRQ(PetscIntView(inedges,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD)); 444 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF\n")); 445 CHKERRQ(PetscSFView(imsf,PETSC_VIEWER_STDOUT_WORLD)); 446 CHKERRQ(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF, original numbering\n")); 447 CHKERRQ(PetscSFViewCustomLocals_Private(imsf,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD)); 448 CHKERRQ(PetscSFDestroy(&imsf)); 449 CHKERRQ(PetscFree(mRootsOrigNumbering)); 450 } 451 452 /* Clean storage for star forest. */ 453 CHKERRQ(PetscSFDestroy(&sf)); 454 ierr = PetscFinalize(); 455 return ierr; 456 } 457 458 /*TEST 459 460 test: 461 nsize: 4 462 filter: grep -v "type" | grep -v "sort" 463 args: -test_bcast -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 464 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 465 466 test: 467 suffix: 2 468 nsize: 4 469 filter: grep -v "type" | grep -v "sort" 470 args: -test_reduce -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 471 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 472 473 test: 474 suffix: 2_basic 475 nsize: 4 476 args: -test_reduce -sf_type basic 477 478 test: 479 suffix: 3 480 nsize: 4 481 filter: grep -v "type" | grep -v "sort" 482 args: -test_degree -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 483 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 484 485 test: 486 suffix: 3_basic 487 nsize: 4 488 args: -test_degree -sf_type basic 489 490 test: 491 suffix: 4 492 nsize: 4 493 filter: grep -v "type" | grep -v "sort" 494 args: -test_gather -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 495 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 496 497 test: 498 suffix: 4_basic 499 nsize: 4 500 args: -test_gather -sf_type basic 501 502 test: 503 suffix: 4_stride 504 nsize: 4 505 args: -test_gather -sf_type basic -stride 2 506 507 test: 508 suffix: 5 509 nsize: 4 510 filter: grep -v "type" | grep -v "sort" 511 args: -test_scatter -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 512 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 513 514 test: 515 suffix: 5_basic 516 nsize: 4 517 args: -test_scatter -sf_type basic 518 519 test: 520 suffix: 5_stride 521 nsize: 4 522 args: -test_scatter -sf_type basic -stride 2 523 524 test: 525 suffix: 6 526 nsize: 4 527 filter: grep -v "type" | grep -v "sort" 528 # No -sf_window_flavor dynamic due to bug https://gitlab.com/petsc/petsc/issues/555 529 args: -test_embed -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}} 530 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 531 532 test: 533 suffix: 6_basic 534 nsize: 4 535 args: -test_embed -sf_type basic 536 537 test: 538 suffix: 7 539 nsize: 4 540 filter: grep -v "type" | grep -v "sort" 541 args: -test_invert -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 542 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 543 544 test: 545 suffix: 7_basic 546 nsize: 4 547 args: -test_invert -sf_type basic 548 549 test: 550 suffix: basic 551 nsize: 4 552 args: -test_bcast -sf_type basic 553 output_file: output/ex1_1_basic.out 554 555 test: 556 suffix: bcastop_basic 557 nsize: 4 558 args: -test_bcastop -sf_type basic 559 output_file: output/ex1_bcastop_basic.out 560 561 test: 562 suffix: 8 563 nsize: 3 564 filter: grep -v "type" | grep -v "sort" 565 args: -test_bcast -test_sf_distribute -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}} 566 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 567 568 test: 569 suffix: 8_basic 570 nsize: 3 571 args: -test_bcast -test_sf_distribute -sf_type basic 572 573 test: 574 suffix: 9_char 575 nsize: 4 576 args: -sf_type basic -test_bcast -test_reduce -test_op max -test_char 577 578 # Here we do not test -sf_window_flavor dynamic since it is designed for repeated SFs with few different rootdata pointers 579 test: 580 suffix: 10 581 filter: grep -v "type" | grep -v "sort" 582 nsize: 4 583 args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}} -test_all -test_bcastop 0 -test_fetchandop 0 584 requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 585 586 # The nightly test suite with MPICH uses ch3:sock, which is broken when winsize == 0 in some of the processes 587 test: 588 suffix: 10_shared 589 output_file: output/ex1_10.out 590 filter: grep -v "type" | grep -v "sort" 591 nsize: 4 592 args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor shared -test_all -test_bcastop 0 -test_fetchandop 0 593 requires: defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) !defined(PETSC_HAVE_MPICH_NUMVERSION) defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 594 595 test: 596 suffix: 10_basic 597 nsize: 4 598 args: -sf_type basic -test_all -test_bcastop 0 -test_fetchandop 0 599 600 TEST*/ 601