1 /* 2 Code for allocating Unix shared memory on MPI rank 0 and later accessing it from other MPI processes 3 */ 4 #include <petsc/private/petscimpl.h> 5 #include <petscsys.h> 6 7 PetscBool PCMPIServerActive = PETSC_FALSE; // PETSc is running in server mode 8 PetscBool PCMPIServerInSolve = PETSC_FALSE; // A parallel server solve is occurring 9 PetscBool PCMPIServerUseShmget = PETSC_TRUE; // Use Unix shared memory for distributing objects 10 11 #if defined(PETSC_HAVE_SHMGET) 12 #include <sys/shm.h> 13 #include <sys/mman.h> 14 #include <errno.h> 15 16 typedef struct _PetscShmgetAllocation *PetscShmgetAllocation; 17 struct _PetscShmgetAllocation { 18 void *addr; // address on this process; points to same physical address on all processes 19 int shmkey, shmid; 20 size_t sz; 21 PetscShmgetAllocation next; 22 }; 23 static PetscShmgetAllocation allocations = NULL; 24 25 typedef struct { 26 size_t shmkey[3]; 27 size_t sz[3]; 28 } BcastInfo; 29 30 #endif 31 32 /*@C 33 PetscShmgetAddressesFinalize - frees any shared memory that was allocated by `PetscShmgetAllocateArray()` but 34 not deallocated with `PetscShmgetDeallocateArray()` 35 36 Level: developer 37 38 Notes: 39 This prevents any shared memory allocated, but not deallocated, from remaining on the system and preventing 40 its future use. 41 42 If the program crashes outstanding shared memory allocations may remain. 43 44 .seealso: `PetscShmgetAllocateArray()`, `PetscShmgetDeallocateArray()`, `PetscShmgetUnmapAddresses()` 45 @*/ 46 PetscErrorCode PetscShmgetAddressesFinalize(void) 47 { 48 PetscFunctionBegin; 49 #if defined(PETSC_HAVE_SHMGET) 50 PetscShmgetAllocation next = allocations, previous = NULL; 51 52 while (next) { 53 PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory key %d shmid %d %s, see PCMPIServerBegin()", next->shmkey, next->shmid, strerror(errno)); 54 previous = next; 55 next = next->next; 56 PetscCall(PetscFree(previous)); 57 } 58 #endif 59 PetscFunctionReturn(PETSC_SUCCESS); 60 } 61 62 /* takes a void so can work bsan safe with PetscObjectContainerCompose() */ 63 PetscErrorCode PCMPIServerAddressesDestroy(void **ctx) 64 { 65 PCMPIServerAddresses *addresses = (PCMPIServerAddresses *)*ctx; 66 67 PetscFunctionBegin; 68 #if defined(PETSC_HAVE_SHMGET) 69 PetscCall(PetscShmgetUnmapAddresses(addresses->n, addresses->addr)); 70 PetscCall(PetscFree(addresses)); 71 #else 72 (void)addresses; 73 #endif 74 PetscFunctionReturn(PETSC_SUCCESS); 75 } 76 77 /*@C 78 PetscShmgetMapAddresses - given shared address on the first MPI process determines the 79 addresses on the other MPI processes that map to the same physical memory 80 81 Input Parameters: 82 + comm - the `MPI_Comm` to scatter the address 83 . n - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()` 84 - baseaddres - the addresses on the first MPI process, ignored on all but first process 85 86 Output Parameter: 87 . addres - the addresses on each MPI process, the array of void * must already be allocated 88 89 Level: developer 90 91 Note: 92 This routine does nothing if `PETSC_HAVE_SHMGET` is not defined 93 94 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetUnmapAddresses()` 95 @*/ 96 PetscErrorCode PetscShmgetMapAddresses(MPI_Comm comm, PetscInt n, const void **baseaddres, void **addres) 97 { 98 PetscFunctionBegin; 99 #if defined(PETSC_HAVE_SHMGET) 100 if (PetscGlobalRank == 0) { 101 BcastInfo bcastinfo = { 102 {0, 0, 0}, 103 {0, 0, 0} 104 }; 105 for (PetscInt i = 0; i < n; i++) { 106 PetscShmgetAllocation allocation = allocations; 107 108 while (allocation) { 109 if (allocation->addr == baseaddres[i]) { 110 bcastinfo.shmkey[i] = allocation->shmkey; 111 bcastinfo.sz[i] = allocation->sz; 112 addres[i] = (void *)baseaddres[i]; 113 break; 114 } 115 allocation = allocation->next; 116 } 117 PetscCheck(allocation, comm, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared address %p, see PCMPIServerBegin()", baseaddres[i]); 118 } 119 PetscCall(PetscInfo(NULL, "Mapping PCMPI Server array %p\n", addres[0])); 120 PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm)); 121 } else { 122 BcastInfo bcastinfo = { 123 {0, 0, 0}, 124 {0, 0, 0} 125 }; 126 int shmkey = 0; 127 size_t sz = 0; 128 129 PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm)); 130 for (PetscInt i = 0; i < n; i++) { 131 PetscShmgetAllocation next = allocations, previous = NULL; 132 133 shmkey = (int)bcastinfo.shmkey[i]; 134 sz = bcastinfo.sz[i]; 135 while (next) { 136 if (next->shmkey == shmkey) addres[i] = next->addr; 137 previous = next; 138 next = next->next; 139 } 140 if (!next) { 141 PetscShmgetAllocation allocation; 142 PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation)); 143 allocation->shmkey = shmkey; 144 allocation->sz = sz; 145 allocation->shmid = shmget(allocation->shmkey, allocation->sz, 0666); 146 PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d of size %d, see PCMPIServerBegin()", allocation->shmkey, (int)allocation->sz); 147 allocation->addr = shmat(allocation->shmid, NULL, 0); 148 PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d, see PCMPIServerBegin()", allocation->shmkey); 149 addres[i] = allocation->addr; 150 if (previous) previous->next = allocation; 151 else allocations = allocation; 152 } 153 } 154 } 155 #endif 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 /*@C 160 PetscShmgetUnmapAddresses - given shared addresses on a MPI process unlink it 161 162 Input Parameters: 163 + n - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()` 164 - addres - the addresses 165 166 Level: developer 167 168 Note: 169 This routine does nothing if `PETSC_HAVE_SHMGET` is not defined 170 171 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetMapAddresses()` 172 @*/ 173 PetscErrorCode PetscShmgetUnmapAddresses(PetscInt n, void **addres) PeNS 174 { 175 PetscFunctionBegin; 176 #if defined(PETSC_HAVE_SHMGET) 177 if (PetscGlobalRank > 0) { 178 for (PetscInt i = 0; i < n; i++) { 179 PetscShmgetAllocation next = allocations, previous = NULL; 180 PetscBool found = PETSC_FALSE; 181 182 while (next) { 183 if (next->addr == addres[i]) { 184 PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno)); 185 if (previous) previous->next = next->next; 186 else allocations = next->next; 187 PetscCall(PetscFree(next)); 188 found = PETSC_TRUE; 189 break; 190 } 191 previous = next; 192 next = next->next; 193 } 194 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to find address %p to unmap, see PCMPIServerBegin()", addres[i]); 195 } 196 } 197 #endif 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 /*@C 202 PetscShmgetAllocateArray - allocates shared memory accessible by all MPI processes in the server 203 204 Not Collective, only called on the first MPI process 205 206 Input Parameters: 207 + sz - the number of elements in the array 208 - asz - the size of an entry in the array, for example `sizeof(PetscScalar)` 209 210 Output Parameters: 211 . addr - the address of the array 212 213 Level: developer 214 215 Notes: 216 Uses `PetscMalloc()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running 217 218 Sometimes when a program crashes, shared memory IDs may remain, making it impossible to rerun the program. 219 Use 220 .vb 221 $PETSC_DIR/lib/petsc/bin/petscfreesharedmemory 222 .ve to free that memory. The Linux command `ipcrm --all` or macOS command `for i in $(ipcs -m | tail -$(expr $(ipcs -m | wc -l) - 3) | tr -s ' ' | cut -d" " -f3); do ipcrm -M $i; done` 223 will also free the memory. 224 225 Use the Unix command `ipcs -m` to see what memory IDs are currently allocated and `ipcrm -m ID` to remove a memory ID 226 227 Under Apple macOS the following file must be copied to /Library/LaunchDaemons/sharedmemory.plist (ensure this file is owned by root and not the user) 228 and the machine rebooted before using shared memory 229 .vb 230 <?xml version="1.0" encoding="UTF-8"?> 231 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> 232 <plist version="1.0"> 233 <dict> 234 <key>Label</key> 235 <string>shmemsetup</string> 236 <key>UserName</key> 237 <string>root</string> 238 <key>GroupName</key> 239 <string>wheel</string> 240 <key>ProgramArguments</key> 241 <array> 242 <string>/usr/sbin/sysctl</string> 243 <string>-w</string> 244 <string>kern.sysv.shmmax=4194304000</string> 245 <string>kern.sysv.shmmni=2064</string> 246 <string>kern.sysv.shmseg=2064</string> 247 <string>kern.sysv.shmall=131072000</string> 248 </array> 249 <key>KeepAlive</key> 250 <false/> 251 <key>RunAtLoad</key> 252 <true/> 253 </dict> 254 </plist> 255 .ve 256 257 Use the command 258 .vb 259 /usr/sbin/sysctl -a | grep shm 260 .ve 261 to confirm that the shared memory limits you have requested are available. 262 263 Fortran Note: 264 The calling sequence is `PetscShmgetAllocateArray[Scalar,Int](PetscInt start, PetscInt len, Petsc[Scalar,Int], pointer :: d1(:), ierr)` 265 266 Developer Note: 267 More specifically this uses `PetscMalloc()` if `!PCMPIServerUseShmget` || `!PCMPIServerActive` || `PCMPIServerInSolve` 268 where `PCMPIServerInSolve` indicates that the solve is nested inside a MPI linear solver server solve and hence should 269 not allocate the vector and matrix memory in shared memory. 270 271 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetDeallocateArray()` 272 @*/ 273 PetscErrorCode PetscShmgetAllocateArray(size_t sz, size_t asz, void *addr[]) 274 { 275 PetscFunctionBegin; 276 if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscMalloc(sz * asz, addr)); 277 #if defined(PETSC_HAVE_SHMGET) 278 else { 279 PetscShmgetAllocation allocation; 280 static int shmkeys = 10; 281 282 PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation)); 283 allocation->shmkey = shmkeys++; 284 allocation->sz = sz * asz; 285 allocation->shmid = shmget(allocation->shmkey, allocation->sz, 0666 | IPC_CREAT); 286 PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to schmget() of size %d with key %d %s see PetscShmgetAllocateArray()", (int)allocation->sz, allocation->shmkey, strerror(errno)); 287 allocation->addr = shmat(allocation->shmid, NULL, 0); 288 PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to shmat() of shmid %d %s", allocation->shmid, strerror(errno)); 289 #if PETSC_SIZEOF_VOID_P == 8 290 PetscCheck((uint64_t)allocation->addr != 0xffffffffffffffff, PETSC_COMM_SELF, PETSC_ERR_LIB, "shmat() of shmid %d returned 0xffffffffffffffff %s, see PCMPIServerBegin()", allocation->shmid, strerror(errno)); 291 #endif 292 293 if (!allocations) allocations = allocation; 294 else { 295 PetscShmgetAllocation next = allocations; 296 while (next->next) next = next->next; 297 next->next = allocation; 298 } 299 *addr = allocation->addr; 300 PetscCall(PetscInfo(NULL, "Allocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, allocation->shmkey, allocation->shmid, (int)allocation->sz)); 301 } 302 #endif 303 PetscFunctionReturn(PETSC_SUCCESS); 304 } 305 306 /*@C 307 PetscShmgetDeallocateArray - deallocates shared memory accessible by all MPI processes in the server 308 309 Not Collective, only called on the first MPI process 310 311 Input Parameter: 312 . addr - the address of array 313 314 Level: developer 315 316 Note: 317 Uses `PetscFree()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running 318 319 Fortran Note: 320 The calling sequence is `PetscShmgetDeallocateArray[Scalar,Int](Petsc[Scalar,Int], pointer :: d1(:), ierr)` 321 322 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetAllocateArray()` 323 @*/ 324 PetscErrorCode PetscShmgetDeallocateArray(void *addr[]) 325 { 326 PetscFunctionBegin; 327 if (!*addr) PetscFunctionReturn(PETSC_SUCCESS); 328 if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscFree(*addr)); 329 #if defined(PETSC_HAVE_SHMGET) 330 else { 331 PetscShmgetAllocation next = allocations, previous = NULL; 332 333 while (next) { 334 if (next->addr == *addr) { 335 PetscCall(PetscInfo(NULL, "Deallocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, next->shmkey, next->shmid, (int)next->sz)); 336 PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno)); 337 PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory addr %p key %d shmid %d %s, see PCMPIServerBegin()", *addr, next->shmkey, next->shmid, strerror(errno)); 338 *addr = NULL; 339 if (previous) previous->next = next->next; 340 else allocations = next->next; 341 PetscCall(PetscFree(next)); 342 PetscFunctionReturn(PETSC_SUCCESS); 343 } 344 previous = next; 345 next = next->next; 346 } 347 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared memory address %p", *addr); 348 } 349 #endif 350 PetscFunctionReturn(PETSC_SUCCESS); 351 } 352 353 #if defined(PETSC_USE_FORTRAN_BINDINGS) 354 #include <petsc/private/ftnimpl.h> 355 356 #if defined(PETSC_HAVE_FORTRAN_CAPS) 357 #define petscshmgetallocatearrayscalar_ PETSCSHMGETALLOCATEARRAYSCALAR 358 #define petscshmgetdeallocatearrayscalar_ PETSCSHMGETDEALLOCATEARRAYSCALAR 359 #define petscshmgetallocatearrayint_ PETSCSHMGETALLOCATEARRAYINT 360 #define petscshmgetdeallocatearrayint_ PETSCSHMGETDEALLOCATEARRAYINT 361 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 362 #define petscshmgetallocatearrayscalar_ petscshmgetallocatearrayscalar 363 #define petscshmgetdeallocatearrayscalar_ petscshmgetdeallocatearrayscalar 364 #define petscshmgetallocatearrayint_ petscshmgetallocatearrayint 365 #define petscshmgetdeallocatearrayint_ petscshmgetdeallocatearrayint 366 #endif 367 368 PETSC_EXTERN void petscshmgetallocatearrayscalar_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 369 { 370 PetscScalar *aa; 371 372 *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscScalar), (void **)&aa); 373 if (*ierr) return; 374 *ierr = F90Array1dCreate(aa, MPIU_SCALAR, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd)); 375 } 376 377 PETSC_EXTERN void petscshmgetdeallocatearrayscalar_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 378 { 379 PetscScalar *aa; 380 381 *ierr = F90Array1dAccess(a, MPIU_SCALAR, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd)); 382 if (*ierr) return; 383 *ierr = PetscShmgetDeallocateArray((void **)&aa); 384 if (*ierr) return; 385 *ierr = F90Array1dDestroy(a, MPIU_SCALAR PETSC_F90_2PTR_PARAM(ptrd)); 386 } 387 388 PETSC_EXTERN void petscshmgetallocatearrayint_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 389 { 390 PetscInt *aa; 391 392 *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscInt), (void **)&aa); 393 if (*ierr) return; 394 *ierr = F90Array1dCreate(aa, MPIU_INT, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd)); 395 } 396 397 PETSC_EXTERN void petscshmgetdeallocatearrayint_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 398 { 399 PetscInt *aa; 400 401 *ierr = F90Array1dAccess(a, MPIU_INT, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd)); 402 if (*ierr) return; 403 *ierr = PetscShmgetDeallocateArray((void **)&aa); 404 if (*ierr) return; 405 *ierr = F90Array1dDestroy(a, MPIU_INT PETSC_F90_2PTR_PARAM(ptrd)); 406 } 407 408 #endif 409