1 /* 2 Code for allocating Unix shared memory on MPI rank 0 and later accessing it from other MPI processes 3 */ 4 #include <petsc/private/petscimpl.h> 5 #include <petscsys.h> 6 7 PetscBool PCMPIServerActive = PETSC_FALSE; // PETSc is running in server mode 8 PetscBool PCMPIServerInSolve = PETSC_FALSE; // A parallel server solve is occurring 9 PetscBool PCMPIServerUseShmget = PETSC_TRUE; // Use Unix shared memory for distributing objects 10 11 #if defined(PETSC_HAVE_SHMGET) 12 #include <sys/shm.h> 13 #include <sys/mman.h> 14 #include <errno.h> 15 16 typedef struct _PetscShmgetAllocation *PetscShmgetAllocation; 17 struct _PetscShmgetAllocation { 18 void *addr; // address on this process; points to same physical address on all processes 19 int shmkey, shmid; 20 size_t sz; 21 PetscShmgetAllocation next; 22 }; 23 static PetscShmgetAllocation allocations = NULL; 24 25 typedef struct { 26 size_t shmkey[3]; 27 size_t sz[3]; 28 } BcastInfo; 29 30 #endif 31 32 /*@C 33 PetscShmgetAddressesFinalize - frees any shared memory that was allocated by `PetscShmgetAllocateArray()` but 34 not deallocated with `PetscShmgetDeallocateArray()` 35 36 Level: developer 37 38 Notes: 39 This prevents any shared memory allocated, but not deallocated, from remaining on the system and preventing 40 its future use. 41 42 If the program crashes outstanding shared memory allocations may remain. 43 44 .seealso: `PetscShmgetAllocateArray()`, `PetscShmgetDeallocateArray()`, `PetscShmgetUnmapAddresses()` 45 @*/ 46 PetscErrorCode PetscShmgetAddressesFinalize(void) 47 { 48 PetscFunctionBegin; 49 #if defined(PETSC_HAVE_SHMGET) 50 PetscShmgetAllocation next = allocations, previous = NULL; 51 52 while (next) { 53 PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory key %d shmid %d %s, see PCMPIServerBegin()", next->shmkey, next->shmid, strerror(errno)); 54 previous = next; 55 next = next->next; 56 PetscCall(PetscFree(previous)); 57 } 58 #endif 59 PetscFunctionReturn(PETSC_SUCCESS); 60 } 61 62 /* takes a void so can work bsan safe with PetscObjectContainerCompose() */ 63 PetscErrorCode PCMPIServerAddressesDestroy(void **ctx) 64 { 65 PCMPIServerAddresses *addresses = (PCMPIServerAddresses *)*ctx; 66 67 PetscFunctionBegin; 68 #if defined(PETSC_HAVE_SHMGET) 69 PetscCall(PetscShmgetUnmapAddresses(addresses->n, addresses->addr)); 70 PetscCall(PetscFree(addresses)); 71 #endif 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 /*@C 76 PetscShmgetMapAddresses - given shared address on the first MPI process determines the 77 addresses on the other MPI processes that map to the same physical memory 78 79 Input Parameters: 80 + comm - the `MPI_Comm` to scatter the address 81 . n - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()` 82 - baseaddres - the addresses on the first MPI process, ignored on all but first process 83 84 Output Parameter: 85 . addres - the addresses on each MPI process, the array of void * must already be allocated 86 87 Level: developer 88 89 Note: 90 This routine does nothing if `PETSC_HAVE_SHMGET` is not defined 91 92 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetUnmapAddresses()` 93 @*/ 94 PetscErrorCode PetscShmgetMapAddresses(MPI_Comm comm, PetscInt n, const void **baseaddres, void **addres) 95 { 96 PetscFunctionBegin; 97 #if defined(PETSC_HAVE_SHMGET) 98 if (PetscGlobalRank == 0) { 99 BcastInfo bcastinfo = { 100 {0, 0, 0}, 101 {0, 0, 0} 102 }; 103 for (PetscInt i = 0; i < n; i++) { 104 PetscShmgetAllocation allocation = allocations; 105 106 while (allocation) { 107 if (allocation->addr == baseaddres[i]) { 108 bcastinfo.shmkey[i] = allocation->shmkey; 109 bcastinfo.sz[i] = allocation->sz; 110 addres[i] = (void *)baseaddres[i]; 111 break; 112 } 113 allocation = allocation->next; 114 } 115 PetscCheck(allocation, comm, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared address %p, see PCMPIServerBegin()", baseaddres[i]); 116 } 117 PetscCall(PetscInfo(NULL, "Mapping PCMPI Server array %p\n", addres[0])); 118 PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm)); 119 } else { 120 BcastInfo bcastinfo = { 121 {0, 0, 0}, 122 {0, 0, 0} 123 }; 124 int shmkey = 0; 125 size_t sz = 0; 126 127 PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm)); 128 for (PetscInt i = 0; i < n; i++) { 129 PetscShmgetAllocation next = allocations, previous = NULL; 130 131 shmkey = (int)bcastinfo.shmkey[i]; 132 sz = bcastinfo.sz[i]; 133 while (next) { 134 if (next->shmkey == shmkey) addres[i] = next->addr; 135 previous = next; 136 next = next->next; 137 } 138 if (!next) { 139 PetscShmgetAllocation allocation; 140 PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation)); 141 allocation->shmkey = shmkey; 142 allocation->sz = sz; 143 allocation->shmid = shmget(allocation->shmkey, allocation->sz, 0666); 144 PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d of size %d, see PCMPIServerBegin()", allocation->shmkey, (int)allocation->sz); 145 allocation->addr = shmat(allocation->shmid, NULL, 0); 146 PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d, see PCMPIServerBegin()", allocation->shmkey); 147 addres[i] = allocation->addr; 148 if (previous) previous->next = allocation; 149 else allocations = allocation; 150 } 151 } 152 } 153 #endif 154 PetscFunctionReturn(PETSC_SUCCESS); 155 } 156 157 /*@C 158 PetscShmgetUnmapAddresses - given shared addresses on a MPI process unlink it 159 160 Input Parameters: 161 + n - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()` 162 - addres - the addresses 163 164 Level: developer 165 166 Note: 167 This routine does nothing if `PETSC_HAVE_SHMGET` is not defined 168 169 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetMapAddresses()` 170 @*/ 171 PetscErrorCode PetscShmgetUnmapAddresses(PetscInt n, void **addres) PeNS 172 { 173 PetscFunctionBegin; 174 #if defined(PETSC_HAVE_SHMGET) 175 if (PetscGlobalRank > 0) { 176 for (PetscInt i = 0; i < n; i++) { 177 PetscShmgetAllocation next = allocations, previous = NULL; 178 PetscBool found = PETSC_FALSE; 179 180 while (next) { 181 if (next->addr == addres[i]) { 182 PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno)); 183 if (previous) previous->next = next->next; 184 else allocations = next->next; 185 PetscCall(PetscFree(next)); 186 found = PETSC_TRUE; 187 break; 188 } 189 previous = next; 190 next = next->next; 191 } 192 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to find address %p to unmap, see PCMPIServerBegin()", addres[i]); 193 } 194 } 195 #endif 196 PetscFunctionReturn(PETSC_SUCCESS); 197 } 198 199 /*@C 200 PetscShmgetAllocateArray - allocates shared memory accessible by all MPI processes in the server 201 202 Not Collective, only called on the first MPI process 203 204 Input Parameters: 205 + sz - the number of elements in the array 206 - asz - the size of an entry in the array, for example `sizeof(PetscScalar)` 207 208 Output Parameters: 209 . addr - the address of the array 210 211 Level: developer 212 213 Notes: 214 Uses `PetscMalloc()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running 215 216 Sometimes when a program crashes, shared memory IDs may remain, making it impossible to rerun the program. 217 Use 218 .vb 219 $PETSC_DIR/lib/petsc/bin/petscfreesharedmemory 220 .ve to free that memory. The Linux command `ipcrm --all` or macOS command `for i in $(ipcs -m | tail -$(expr $(ipcs -m | wc -l) - 3) | tr -s ' ' | cut -d" " -f3); do ipcrm -M $i; done` 221 will also free the memory. 222 223 Use the Unix command `ipcs -m` to see what memory IDs are currently allocated and `ipcrm -m ID` to remove a memory ID 224 225 Under Apple macOS the following file must be copied to /Library/LaunchDaemons/sharedmemory.plist (ensure this file is owned by root and not the user) 226 and the machine rebooted before using shared memory 227 .vb 228 <?xml version="1.0" encoding="UTF-8"?> 229 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> 230 <plist version="1.0"> 231 <dict> 232 <key>Label</key> 233 <string>shmemsetup</string> 234 <key>UserName</key> 235 <string>root</string> 236 <key>GroupName</key> 237 <string>wheel</string> 238 <key>ProgramArguments</key> 239 <array> 240 <string>/usr/sbin/sysctl</string> 241 <string>-w</string> 242 <string>kern.sysv.shmmax=4194304000</string> 243 <string>kern.sysv.shmmni=2064</string> 244 <string>kern.sysv.shmseg=2064</string> 245 <string>kern.sysv.shmall=131072000</string> 246 </array> 247 <key>KeepAlive</key> 248 <false/> 249 <key>RunAtLoad</key> 250 <true/> 251 </dict> 252 </plist> 253 .ve 254 255 Use the command 256 .vb 257 /usr/sbin/sysctl -a | grep shm 258 .ve 259 to confirm that the shared memory limits you have requested are available. 260 261 Fortran Note: 262 The calling sequence is `PetscShmgetAllocateArray[Scalar,Int](PetscInt start, PetscInt len, Petsc[Scalar,Int], pointer :: d1(:), ierr)` 263 264 Developer Note: 265 More specifically this uses `PetscMalloc()` if `!PCMPIServerUseShmget` || `!PCMPIServerActive` || `PCMPIServerInSolve` 266 where `PCMPIServerInSolve` indicates that the solve is nested inside a MPI linear solver server solve and hence should 267 not allocate the vector and matrix memory in shared memory. 268 269 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetDeallocateArray()` 270 @*/ 271 PetscErrorCode PetscShmgetAllocateArray(size_t sz, size_t asz, void *addr[]) 272 { 273 PetscFunctionBegin; 274 if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscMalloc(sz * asz, addr)); 275 #if defined(PETSC_HAVE_SHMGET) 276 else { 277 PetscShmgetAllocation allocation; 278 static int shmkeys = 10; 279 280 PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation)); 281 allocation->shmkey = shmkeys++; 282 allocation->sz = sz * asz; 283 allocation->shmid = shmget(allocation->shmkey, allocation->sz, 0666 | IPC_CREAT); 284 PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to schmget() of size %d with key %d %s see PetscShmgetAllocateArray()", (int)allocation->sz, allocation->shmkey, strerror(errno)); 285 allocation->addr = shmat(allocation->shmid, NULL, 0); 286 PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to shmat() of shmid %d %s", allocation->shmid, strerror(errno)); 287 #if PETSC_SIZEOF_VOID_P == 8 288 PetscCheck((uint64_t)allocation->addr != 0xffffffffffffffff, PETSC_COMM_SELF, PETSC_ERR_LIB, "shmat() of shmid %d returned 0xffffffffffffffff %s, see PCMPIServerBegin()", allocation->shmid, strerror(errno)); 289 #endif 290 291 if (!allocations) allocations = allocation; 292 else { 293 PetscShmgetAllocation next = allocations; 294 while (next->next) next = next->next; 295 next->next = allocation; 296 } 297 *addr = allocation->addr; 298 PetscCall(PetscInfo(NULL, "Allocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, allocation->shmkey, allocation->shmid, (int)allocation->sz)); 299 } 300 #endif 301 PetscFunctionReturn(PETSC_SUCCESS); 302 } 303 304 /*@C 305 PetscShmgetDeallocateArray - deallocates shared memory accessible by all MPI processes in the server 306 307 Not Collective, only called on the first MPI process 308 309 Input Parameter: 310 . addr - the address of array 311 312 Level: developer 313 314 Note: 315 Uses `PetscFree()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running 316 317 Fortran Note: 318 The calling sequence is `PetscShmgetDeallocateArray[Scalar,Int](Petsc[Scalar,Int], pointer :: d1(:), ierr)` 319 320 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetAllocateArray()` 321 @*/ 322 PetscErrorCode PetscShmgetDeallocateArray(void *addr[]) 323 { 324 PetscFunctionBegin; 325 if (!*addr) PetscFunctionReturn(PETSC_SUCCESS); 326 if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscFree(*addr)); 327 #if defined(PETSC_HAVE_SHMGET) 328 else { 329 PetscShmgetAllocation next = allocations, previous = NULL; 330 331 while (next) { 332 if (next->addr == *addr) { 333 PetscCall(PetscInfo(NULL, "Deallocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, next->shmkey, next->shmid, (int)next->sz)); 334 PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno)); 335 PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory addr %p key %d shmid %d %s, see PCMPIServerBegin()", *addr, next->shmkey, next->shmid, strerror(errno)); 336 *addr = NULL; 337 if (previous) previous->next = next->next; 338 else allocations = next->next; 339 PetscCall(PetscFree(next)); 340 PetscFunctionReturn(PETSC_SUCCESS); 341 } 342 previous = next; 343 next = next->next; 344 } 345 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared memory address %p", *addr); 346 } 347 #endif 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 #if defined(PETSC_USE_FORTRAN_BINDINGS) 352 #include <petsc/private/ftnimpl.h> 353 354 #if defined(PETSC_HAVE_FORTRAN_CAPS) 355 #define petscshmgetallocatearrayscalar_ PETSCSHMGETALLOCATEARRAYSCALAR 356 #define petscshmgetdeallocatearrayscalar_ PETSCSHMGETDEALLOCATEARRAYSCALAR 357 #define petscshmgetallocatearrayint_ PETSCSHMGETALLOCATEARRAYINT 358 #define petscshmgetdeallocatearrayint_ PETSCSHMGETDEALLOCATEARRAYINT 359 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 360 #define petscshmgetallocatearrayscalar_ petscshmgetallocatearrayscalar 361 #define petscshmgetdeallocatearrayscalar_ petscshmgetdeallocatearrayscalar 362 #define petscshmgetallocatearrayint_ petscshmgetallocatearrayint 363 #define petscshmgetdeallocatearrayint_ petscshmgetdeallocatearrayint 364 #endif 365 366 PETSC_EXTERN void petscshmgetallocatearrayscalar_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 367 { 368 PetscScalar *aa; 369 370 *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscScalar), (void **)&aa); 371 if (*ierr) return; 372 *ierr = F90Array1dCreate(aa, MPIU_SCALAR, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd)); 373 } 374 375 PETSC_EXTERN void petscshmgetdeallocatearrayscalar_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 376 { 377 PetscScalar *aa; 378 379 *ierr = F90Array1dAccess(a, MPIU_SCALAR, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd)); 380 if (*ierr) return; 381 *ierr = PetscShmgetDeallocateArray((void **)&aa); 382 if (*ierr) return; 383 *ierr = F90Array1dDestroy(a, MPIU_SCALAR PETSC_F90_2PTR_PARAM(ptrd)); 384 } 385 386 PETSC_EXTERN void petscshmgetallocatearrayint_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 387 { 388 PetscInt *aa; 389 390 *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscInt), (void **)&aa); 391 if (*ierr) return; 392 *ierr = F90Array1dCreate(aa, MPIU_INT, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd)); 393 } 394 395 PETSC_EXTERN void petscshmgetdeallocatearrayint_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd)) 396 { 397 PetscInt *aa; 398 399 *ierr = F90Array1dAccess(a, MPIU_INT, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd)); 400 if (*ierr) return; 401 *ierr = PetscShmgetDeallocateArray((void **)&aa); 402 if (*ierr) return; 403 *ierr = F90Array1dDestroy(a, MPIU_INT PETSC_F90_2PTR_PARAM(ptrd)); 404 } 405 406 #endif 407