1 /* 2 Code for allocating Unix shared memory on MPI rank 0 and later accessing it from other MPI processes 3 */ 4 #include <petscsys.h> 5 6 PetscBool PCMPIServerActive = PETSC_FALSE; // PETSc is running in server mode 7 PetscBool PCMPIServerInSolve = PETSC_FALSE; // A parallel server solve is occurring 8 PetscBool PCMPIServerUseShmget = PETSC_TRUE; // Use Unix shared memory for distributing objects 9 10 #if defined(PETSC_HAVE_SHMGET) 11 #include <sys/shm.h> 12 #include <sys/mman.h> 13 #include <errno.h> 14 15 typedef struct _PetscShmgetAllocation *PetscShmgetAllocation; 16 struct _PetscShmgetAllocation { 17 void *addr; // address on this process; points to same physical address on all processes 18 int shmkey, shmid; 19 size_t sz; 20 PetscShmgetAllocation next; 21 }; 22 static PetscShmgetAllocation allocations = NULL; 23 24 typedef struct { 25 size_t shmkey[3]; 26 size_t sz[3]; 27 } BcastInfo; 28 29 #endif 30 31 /*@C 32 PetscShmgetAddressesFinalize - frees any shared memory that was allocated by `PetscShmgetAllocateArray()` but 33 not deallocated with `PetscShmgetDeallocateArray()` 34 35 Level: developer 36 37 Notes: 38 This prevents any shared memory allocated, but not deallocated, from remaining on the system and preventing 39 its future use. 40 41 If the program crashes outstanding shared memory allocations may remain. 42 43 .seealso: `PetscShmgetAllocateArray()`, `PetscShmgetDeallocateArray()`, `PetscShmgetUnmapAddresses()` 44 @*/ 45 PetscErrorCode PetscShmgetAddressesFinalize(void) 46 { 47 PetscFunctionBegin; 48 #if defined(PETSC_HAVE_SHMGET) 49 PetscShmgetAllocation next = allocations, previous = NULL; 50 51 while (next) { 52 PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory key %d shmid %d %s, see PCMPIServerBegin()", next->shmkey, next->shmid, strerror(errno)); 53 previous = next; 54 next = next->next; 55 PetscCall(PetscFree(previous)); 56 } 57 #endif 58 PetscFunctionReturn(PETSC_SUCCESS); 59 } 60 61 /* takes a void so can work bsan safe with PetscObjectContainerCompose() */ 62 PetscErrorCode PCMPIServerAddressesDestroy(void **ctx) 63 { 64 PCMPIServerAddresses *addresses = (PCMPIServerAddresses *)*ctx; 65 66 PetscFunctionBegin; 67 #if defined(PETSC_HAVE_SHMGET) 68 PetscCall(PetscShmgetUnmapAddresses(addresses->n, addresses->addr)); 69 PetscCall(PetscFree(addresses)); 70 #endif 71 PetscFunctionReturn(PETSC_SUCCESS); 72 } 73 74 /*@C 75 PetscShmgetMapAddresses - given shared address on the first MPI process determines the 76 addresses on the other MPI processes that map to the same physical memory 77 78 Input Parameters: 79 + comm - the `MPI_Comm` to scatter the address 80 . n - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()` 81 - baseaddres - the addresses on the first MPI process, ignored on all but first process 82 83 Output Parameter: 84 . addres - the addresses on each MPI process, the array of void * must already be allocated 85 86 Level: developer 87 88 Note: 89 This routine does nothing if `PETSC_HAVE_SHMGET` is not defined 90 91 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetUnmapAddresses()` 92 @*/ 93 PetscErrorCode PetscShmgetMapAddresses(MPI_Comm comm, PetscInt n, const void **baseaddres, void **addres) 94 { 95 PetscFunctionBegin; 96 #if defined(PETSC_HAVE_SHMGET) 97 if (PetscGlobalRank == 0) { 98 BcastInfo bcastinfo = { 99 {0, 0, 0}, 100 {0, 0, 0} 101 }; 102 for (PetscInt i = 0; i < n; i++) { 103 PetscShmgetAllocation allocation = allocations; 104 105 while (allocation) { 106 if (allocation->addr == baseaddres[i]) { 107 bcastinfo.shmkey[i] = allocation->shmkey; 108 bcastinfo.sz[i] = allocation->sz; 109 addres[i] = (void *)baseaddres[i]; 110 break; 111 } 112 allocation = allocation->next; 113 } 114 PetscCheck(allocation, comm, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared address %p, see PCMPIServerBegin()", baseaddres[i]); 115 } 116 PetscCall(PetscInfo(NULL, "Mapping PCMPI Server array %p\n", addres[0])); 117 PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm)); 118 } else { 119 BcastInfo bcastinfo = { 120 {0, 0, 0}, 121 {0, 0, 0} 122 }; 123 int shmkey = 0; 124 size_t sz = 0; 125 126 PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm)); 127 for (PetscInt i = 0; i < n; i++) { 128 PetscShmgetAllocation next = allocations, previous = NULL; 129 130 shmkey = (int)bcastinfo.shmkey[i]; 131 sz = bcastinfo.sz[i]; 132 while (next) { 133 if (next->shmkey == shmkey) addres[i] = next->addr; 134 previous = next; 135 next = next->next; 136 } 137 if (!next) { 138 PetscShmgetAllocation allocation; 139 PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation)); 140 allocation->shmkey = shmkey; 141 allocation->sz = sz; 142 allocation->shmid = shmget(allocation->shmkey, allocation->sz, 0666); 143 PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d of size %d, see PCMPIServerBegin()", allocation->shmkey, (int)allocation->sz); 144 allocation->addr = shmat(allocation->shmid, NULL, 0); 145 PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d, see PCMPIServerBegin()", allocation->shmkey); 146 addres[i] = allocation->addr; 147 if (previous) previous->next = allocation; 148 else allocations = allocation; 149 } 150 } 151 } 152 #endif 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 /*@C 157 PetscShmgetUnmapAddresses - given shared addresses on a MPI process unlink it 158 159 Input Parameters: 160 + n - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()` 161 - addres - the addresses 162 163 Level: developer 164 165 Note: 166 This routine does nothing if `PETSC_HAVE_SHMGET` is not defined 167 168 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetMapAddresses()` 169 @*/ 170 PetscErrorCode PetscShmgetUnmapAddresses(PetscInt n, void **addres) 171 { 172 PetscFunctionBegin; 173 #if defined(PETSC_HAVE_SHMGET) 174 if (PetscGlobalRank > 0) { 175 for (PetscInt i = 0; i < n; i++) { 176 PetscShmgetAllocation next = allocations, previous = NULL; 177 PetscBool found = PETSC_FALSE; 178 179 while (next) { 180 if (next->addr == addres[i]) { 181 PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno)); 182 if (previous) previous->next = next->next; 183 else allocations = next->next; 184 PetscCall(PetscFree(next)); 185 found = PETSC_TRUE; 186 break; 187 } 188 previous = next; 189 next = next->next; 190 } 191 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to find address %p to unmap, see PCMPIServerBegin()", addres[i]); 192 } 193 } 194 #endif 195 PetscFunctionReturn(PETSC_SUCCESS); 196 } 197 198 /*@C 199 PetscShmgetAllocateArray - allocates shared memory accessible by all MPI processes in the server 200 201 Not Collective, only called on the first MPI process 202 203 Input Parameters: 204 + sz - the number of elements in the array 205 - asz - the size of an entry in the array, for example `sizeof(PetscScalar)` 206 207 Output Parameters: 208 . addr - the address of the array 209 210 Level: developer 211 212 Notes: 213 Uses `PetscMalloc()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running 214 215 Sometimes when a program crashes, shared memory IDs may remain, making it impossible to rerun the program. 216 Use 217 .vb 218 $PETSC_DIR/lib/petsc/bin/petscfreesharedmemory 219 .ve to free that memory. The Linux command `ipcrm --all` or macOS command `for i in $(ipcs -m | tail -$(expr $(ipcs -m | wc -l) - 3) | tr -s ' ' | cut -d" " -f3); do ipcrm -M $i; done` 220 will also free the memory. 221 222 Use the Unix command `ipcs -m` to see what memory IDs are currently allocated and `ipcrm -m ID` to remove a memory ID 223 224 Under Apple macOS the following file must be copied to /Library/LaunchDaemons/sharedmemory.plist (ensure this file is owned by root and not the user) 225 and the machine rebooted before using shared memory 226 .vb 227 <?xml version="1.0" encoding="UTF-8"?> 228 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> 229 <plist version="1.0"> 230 <dict> 231 <key>Label</key> 232 <string>shmemsetup</string> 233 <key>UserName</key> 234 <string>root</string> 235 <key>GroupName</key> 236 <string>wheel</string> 237 <key>ProgramArguments</key> 238 <array> 239 <string>/usr/sbin/sysctl</string> 240 <string>-w</string> 241 <string>kern.sysv.shmmax=4194304000</string> 242 <string>kern.sysv.shmmni=2064</string> 243 <string>kern.sysv.shmseg=2064</string> 244 <string>kern.sysv.shmall=131072000</string> 245 </array> 246 <key>KeepAlive</key> 247 <false/> 248 <key>RunAtLoad</key> 249 <true/> 250 </dict> 251 </plist> 252 .ve 253 254 Use the command 255 .vb 256 /usr/sbin/sysctl -a | grep shm 257 .ve 258 to confirm that the shared memory limits you have requested are available. 259 260 Fortran Note: 261 The calling sequence is `PetscShmgetAllocateArray[Scalar,Int](PetscInt start, PetscInt len, Petsc[Scalar,Int], pointer :: d1(:), ierr)` 262 263 Developer Note: 264 More specifically this uses `PetscMalloc()` if `!PCMPIServerUseShmget` || `!PCMPIServerActive` || `PCMPIServerInSolve` 265 where `PCMPIServerInSolve` indicates that the solve is nested inside a MPI linear solver server solve and hence should 266 not allocate the vector and matrix memory in shared memory. 267 268 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetDeallocateArray()` 269 @*/ 270 PetscErrorCode PetscShmgetAllocateArray(size_t sz, size_t asz, void **addr) 271 { 272 PetscFunctionBegin; 273 if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscMalloc(sz * asz, addr)); 274 #if defined(PETSC_HAVE_SHMGET) 275 else { 276 PetscShmgetAllocation allocation; 277 static int shmkeys = 10; 278 279 PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation)); 280 allocation->shmkey = shmkeys++; 281 allocation->sz = sz * asz; 282 allocation->shmid = shmget(allocation->shmkey, allocation->sz, 0666 | IPC_CREAT); 283 PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to schmget() of size %d with key %d %s see PetscShmgetAllocateArray()", (int)allocation->sz, allocation->shmkey, strerror(errno)); 284 allocation->addr = shmat(allocation->shmid, NULL, 0); 285 PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to shmat() of shmid %d %s", allocation->shmid, strerror(errno)); 286 #if PETSC_SIZEOF_VOID_P == 8 287 PetscCheck((uint64_t)allocation->addr != 0xffffffffffffffff, PETSC_COMM_SELF, PETSC_ERR_LIB, "shmat() of shmid %d returned 0xffffffffffffffff %s, see PCMPIServerBegin()", allocation->shmid, strerror(errno)); 288 #endif 289 290 if (!allocations) allocations = allocation; 291 else { 292 PetscShmgetAllocation next = allocations; 293 while (next->next) next = next->next; 294 next->next = allocation; 295 } 296 *addr = allocation->addr; 297 PetscCall(PetscInfo(NULL, "Allocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, allocation->shmkey, allocation->shmid, (int)allocation->sz)); 298 } 299 #endif 300 PetscFunctionReturn(PETSC_SUCCESS); 301 } 302 303 /*@C 304 PetscShmgetDeallocateArray - deallocates shared memory accessible by all MPI processes in the server 305 306 Not Collective, only called on the first MPI process 307 308 Input Parameter: 309 . addr - the address of array 310 311 Level: developer 312 313 Note: 314 Uses `PetscFree()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running 315 316 Fortran Note: 317 The calling sequence is `PetscShmgetDeallocateArray[Scalar,Int](Petsc[Scalar,Int], pointer :: d1(:), ierr)` 318 319 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetAllocateArray()` 320 @*/ 321 PetscErrorCode PetscShmgetDeallocateArray(void **addr) 322 { 323 PetscFunctionBegin; 324 if (!*addr) PetscFunctionReturn(PETSC_SUCCESS); 325 if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscFree(*addr)); 326 #if defined(PETSC_HAVE_SHMGET) 327 else { 328 PetscShmgetAllocation next = allocations, previous = NULL; 329 330 while (next) { 331 if (next->addr == *addr) { 332 PetscCall(PetscInfo(NULL, "Deallocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, next->shmkey, next->shmid, (int)next->sz)); 333 PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno)); 334 PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory addr %p key %d shmid %d %s, see PCMPIServerBegin()", *addr, next->shmkey, next->shmid, strerror(errno)); 335 *addr = NULL; 336 if (previous) previous->next = next->next; 337 else allocations = next->next; 338 PetscCall(PetscFree(next)); 339 PetscFunctionReturn(PETSC_SUCCESS); 340 } 341 previous = next; 342 next = next->next; 343 } 344 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared memory address %p", *addr); 345 } 346 #endif 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 #if defined(PETSC_USE_FORTRAN_BINDINGS) 351 #include <petsc/private/f90impl.h> 352 353 #if defined(PETSC_HAVE_FORTRAN_CAPS) 354 #define petscshmgetallocatearrayscalar_ PETSCSHMGETALLOCATEARRAYSCALAR 355 #define petscshmgetdeallocatearrayscalar_ PETSCSHMGETDEALLOCATEARRAYSCALAR 356 #define petscshmgetallocatearrayint_ PETSCSHMGETALLOCATEARRAYINT 357 #define petscshmgetdeallocatearrayint_ PETSCSHMGETDEALLOCATEARRAYINT 358 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 359 #define petscshmgetallocatearrayscalar_ petscshmgetallocatearrayscalar 360 #define petscshmgetdeallocatearrayscalar_ petscshmgetdeallocatearrayscalar 361 #define petscshmgetallocatearrayint_ petscshmgetallocatearrayint 362 #define petscshmgetdeallocatearrayint_ petscshmgetdeallocatearrayint 363 #endif 364 365 PETSC_EXTERN void petscshmgetallocatearrayscalar_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 366 { 367 PetscScalar *aa; 368 369 *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscScalar), (void **)&aa); 370 if (*ierr) return; 371 *ierr = F90Array1dCreate(aa, MPIU_SCALAR, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd)); 372 } 373 374 PETSC_EXTERN void petscshmgetdeallocatearrayscalar_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 375 { 376 PetscScalar *aa; 377 378 *ierr = F90Array1dAccess(a, MPIU_SCALAR, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd)); 379 if (*ierr) return; 380 *ierr = PetscShmgetDeallocateArray((void **)&aa); 381 if (*ierr) return; 382 *ierr = F90Array1dDestroy(a, MPIU_SCALAR PETSC_F90_2PTR_PARAM(ptrd)); 383 } 384 385 PETSC_EXTERN void petscshmgetallocatearrayint_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 386 { 387 PetscScalar *aa; 388 389 *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscInt), (void **)&aa); 390 if (*ierr) return; 391 *ierr = F90Array1dCreate(aa, MPIU_INT, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd)); 392 } 393 394 PETSC_EXTERN void petscshmgetdeallocatearrayint_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 395 { 396 PetscScalar *aa; 397 398 *ierr = F90Array1dAccess(a, MPIU_INT, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd)); 399 if (*ierr) return; 400 *ierr = PetscShmgetDeallocateArray((void **)&aa); 401 if (*ierr) return; 402 *ierr = F90Array1dDestroy(a, MPIU_INT PETSC_F90_2PTR_PARAM(ptrd)); 403 } 404 405 #endif 406