1 /*
2 Code for allocating Unix shared memory on MPI rank 0 and later accessing it from other MPI processes
3 */
4 #include <petsc/private/petscimpl.h>
5 #include <petscsys.h>
6
7 PetscBool PCMPIServerActive = PETSC_FALSE; // PETSc is running in server mode
8 PetscBool PCMPIServerInSolve = PETSC_FALSE; // A parallel server solve is occurring
9 PetscBool PCMPIServerUseShmget = PETSC_TRUE; // Use Unix shared memory for distributing objects
10
11 #if defined(PETSC_HAVE_SHMGET)
12 #include <sys/shm.h>
13 #include <sys/mman.h>
14 #include <errno.h>
15
16 typedef struct _PetscShmgetAllocation *PetscShmgetAllocation;
17 struct _PetscShmgetAllocation {
18 void *addr; // address on this process; points to same physical address on all processes
19 int shmkey, shmid;
20 size_t sz;
21 PetscShmgetAllocation next;
22 };
23 static PetscShmgetAllocation allocations = NULL;
24
25 typedef struct {
26 size_t shmkey[3];
27 size_t sz[3];
28 } BcastInfo;
29
30 #endif
31
32 /*@C
33 PetscShmgetAddressesFinalize - frees any shared memory that was allocated by `PetscShmgetAllocateArray()` but
34 not deallocated with `PetscShmgetDeallocateArray()`
35
36 Level: developer
37
38 Notes:
39 This prevents any shared memory allocated, but not deallocated, from remaining on the system and preventing
40 its future use.
41
42 If the program crashes outstanding shared memory allocations may remain.
43
44 .seealso: `PetscShmgetAllocateArray()`, `PetscShmgetDeallocateArray()`, `PetscShmgetUnmapAddresses()`
45 @*/
PetscShmgetAddressesFinalize(void)46 PetscErrorCode PetscShmgetAddressesFinalize(void)
47 {
48 PetscFunctionBegin;
49 #if defined(PETSC_HAVE_SHMGET)
50 PetscShmgetAllocation next = allocations, previous = NULL;
51
52 while (next) {
53 PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory key %d shmid %d %s, see PCMPIServerBegin()", next->shmkey, next->shmid, strerror(errno));
54 previous = next;
55 next = next->next;
56 PetscCall(PetscFree(previous));
57 }
58 #endif
59 PetscFunctionReturn(PETSC_SUCCESS);
60 }
61
62 /* takes a void so can work bsan safe with PetscObjectContainerCompose() */
PCMPIServerAddressesDestroy(PetscCtxRt ctx)63 PetscErrorCode PCMPIServerAddressesDestroy(PetscCtxRt ctx)
64 {
65 PCMPIServerAddresses *addresses = *(PCMPIServerAddresses **)ctx;
66
67 PetscFunctionBegin;
68 #if defined(PETSC_HAVE_SHMGET)
69 PetscCall(PetscShmgetUnmapAddresses(addresses->n, addresses->addr));
70 PetscCall(PetscFree(addresses));
71 #else
72 (void)addresses;
73 #endif
74 PetscFunctionReturn(PETSC_SUCCESS);
75 }
76
77 /*@C
78 PetscShmgetMapAddresses - given shared address on the first MPI process determines the
79 addresses on the other MPI processes that map to the same physical memory
80
81 Input Parameters:
82 + comm - the `MPI_Comm` to scatter the address
83 . n - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()`
84 - baseaddres - the addresses on the first MPI process, ignored on all but first process
85
86 Output Parameter:
87 . addres - the addresses on each MPI process, the array of void * must already be allocated
88
89 Level: developer
90
91 Note:
92 This routine does nothing if `PETSC_HAVE_SHMGET` is not defined
93
94 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetUnmapAddresses()`
95 @*/
PetscShmgetMapAddresses(MPI_Comm comm,PetscInt n,const void ** baseaddres,void ** addres)96 PetscErrorCode PetscShmgetMapAddresses(MPI_Comm comm, PetscInt n, const void **baseaddres, void **addres)
97 {
98 PetscFunctionBegin;
99 #if defined(PETSC_HAVE_SHMGET)
100 if (PetscGlobalRank == 0) {
101 BcastInfo bcastinfo = {
102 {0, 0, 0},
103 {0, 0, 0}
104 };
105 for (PetscInt i = 0; i < n; i++) {
106 PetscShmgetAllocation allocation = allocations;
107
108 while (allocation) {
109 if (allocation->addr == baseaddres[i]) {
110 bcastinfo.shmkey[i] = allocation->shmkey;
111 bcastinfo.sz[i] = allocation->sz;
112 addres[i] = (void *)baseaddres[i];
113 break;
114 }
115 allocation = allocation->next;
116 }
117 PetscCheck(allocation, comm, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared address %p, see PCMPIServerBegin()", baseaddres[i]);
118 }
119 PetscCall(PetscInfo(NULL, "Mapping PCMPI Server array %p\n", addres[0]));
120 PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm));
121 } else {
122 BcastInfo bcastinfo = {
123 {0, 0, 0},
124 {0, 0, 0}
125 };
126 int shmkey = 0;
127 size_t sz = 0;
128
129 PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm));
130 for (PetscInt i = 0; i < n; i++) {
131 PetscShmgetAllocation next = allocations, previous = NULL;
132
133 shmkey = (int)bcastinfo.shmkey[i];
134 sz = bcastinfo.sz[i];
135 while (next) {
136 if (next->shmkey == shmkey) addres[i] = next->addr;
137 previous = next;
138 next = next->next;
139 }
140 if (!next) {
141 PetscShmgetAllocation allocation;
142 PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation));
143 allocation->shmkey = shmkey;
144 allocation->sz = sz;
145 allocation->shmid = shmget(allocation->shmkey, allocation->sz, 0666);
146 PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d of size %d, see PCMPIServerBegin()", allocation->shmkey, (int)allocation->sz);
147 allocation->addr = shmat(allocation->shmid, NULL, 0);
148 PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d, see PCMPIServerBegin()", allocation->shmkey);
149 addres[i] = allocation->addr;
150 if (previous) previous->next = allocation;
151 else allocations = allocation;
152 }
153 }
154 }
155 #endif
156 PetscFunctionReturn(PETSC_SUCCESS);
157 }
158
159 /*@C
160 PetscShmgetUnmapAddresses - given shared addresses on a MPI process unlink it
161
162 Input Parameters:
163 + n - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()`
164 - addres - the addresses
165
166 Level: developer
167
168 Note:
169 This routine does nothing if `PETSC_HAVE_SHMGET` is not defined
170
171 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetMapAddresses()`
172 @*/
PetscShmgetUnmapAddresses(PetscInt n,void ** addres)173 PetscErrorCode PetscShmgetUnmapAddresses(PetscInt n, void **addres) PeNS
174 {
175 PetscFunctionBegin;
176 #if defined(PETSC_HAVE_SHMGET)
177 if (PetscGlobalRank > 0) {
178 for (PetscInt i = 0; i < n; i++) {
179 PetscShmgetAllocation next = allocations, previous = NULL;
180 PetscBool found = PETSC_FALSE;
181
182 while (next) {
183 if (next->addr == addres[i]) {
184 PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno));
185 if (previous) previous->next = next->next;
186 else allocations = next->next;
187 PetscCall(PetscFree(next));
188 found = PETSC_TRUE;
189 break;
190 }
191 previous = next;
192 next = next->next;
193 }
194 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to find address %p to unmap, see PCMPIServerBegin()", addres[i]);
195 }
196 }
197 #endif
198 PetscFunctionReturn(PETSC_SUCCESS);
199 }
200
201 /*@C
202 PetscShmgetAllocateArray - allocates shared memory accessible by all MPI processes in the server
203
204 Not Collective, only called on the first MPI process
205
206 Input Parameters:
207 + sz - the number of elements in the array
208 - asz - the size of an entry in the array, for example `sizeof(PetscScalar)`
209
210 Output Parameters:
211 . addr - the address of the array
212
213 Level: developer
214
215 Notes:
216 Uses `PetscMalloc()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running
217
218 Sometimes when a program crashes, shared memory IDs may remain, making it impossible to rerun the program.
219 Use
220 .vb
221 $PETSC_DIR/lib/petsc/bin/petscfreesharedmemory.sh
222 .ve to free that memory. The Linux command `ipcrm --all` or macOS command `for i in $(ipcs -m | tail -$(expr $(ipcs -m | wc -l) - 3) | tr -s ' ' | cut -d" " -f3); do ipcrm -M $i; done`
223 will also free the memory.
224
225 Use the Unix command `ipcs -m` to see what memory IDs are currently allocated and `ipcrm -m ID` to remove a memory ID
226
227 Under Apple macOS the following file must be copied to /Library/LaunchDaemons/sharedmemory.plist (ensure this file is owned by root and not the user)
228 and the machine rebooted before using shared memory
229 .vb
230 <?xml version="1.0" encoding="UTF-8"?>
231 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
232 <plist version="1.0">
233 <dict>
234 <key>Label</key>
235 <string>shmemsetup</string>
236 <key>UserName</key>
237 <string>root</string>
238 <key>GroupName</key>
239 <string>wheel</string>
240 <key>ProgramArguments</key>
241 <array>
242 <string>/usr/sbin/sysctl</string>
243 <string>-w</string>
244 <string>kern.sysv.shmmax=4194304000</string>
245 <string>kern.sysv.shmmni=2064</string>
246 <string>kern.sysv.shmseg=2064</string>
247 <string>kern.sysv.shmall=131072000</string>
248 </array>
249 <key>KeepAlive</key>
250 <false/>
251 <key>RunAtLoad</key>
252 <true/>
253 </dict>
254 </plist>
255 .ve
256
257 Use the command
258 .vb
259 /usr/sbin/sysctl -a | grep shm
260 .ve
261 to confirm that the shared memory limits you have requested are available.
262
263 Fortran Note:
264 The calling sequence is `PetscShmgetAllocateArray[Scalar,Int](PetscInt start, PetscInt len, Petsc[Scalar,Int], pointer :: d1(:), ierr)`
265
266 Developer Note:
267 More specifically this uses `PetscMalloc()` if `!PCMPIServerUseShmget` || `!PCMPIServerActive` || `PCMPIServerInSolve`
268 where `PCMPIServerInSolve` indicates that the solve is nested inside a MPI linear solver server solve and hence should
269 not allocate the vector and matrix memory in shared memory.
270
271 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetDeallocateArray()`
272 @*/
PetscShmgetAllocateArray(size_t sz,size_t asz,void * addr[])273 PetscErrorCode PetscShmgetAllocateArray(size_t sz, size_t asz, void *addr[])
274 {
275 PetscFunctionBegin;
276 if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscMalloc(sz * asz, addr));
277 #if defined(PETSC_HAVE_SHMGET)
278 else {
279 PetscShmgetAllocation allocation;
280 static int shmkeys = 10;
281
282 PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation));
283 allocation->shmkey = shmkeys++;
284 allocation->sz = sz * asz;
285 allocation->shmid = shmget(allocation->shmkey, allocation->sz, 0666 | IPC_CREAT);
286 PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to schmget() of size %d with key %d %s see PetscShmgetAllocateArray()", (int)allocation->sz, allocation->shmkey, strerror(errno));
287 allocation->addr = shmat(allocation->shmid, NULL, 0);
288 PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to shmat() of shmid %d %s", allocation->shmid, strerror(errno));
289 #if PETSC_SIZEOF_VOID_P == 8
290 PetscCheck((uint64_t)allocation->addr != 0xffffffffffffffff, PETSC_COMM_SELF, PETSC_ERR_LIB, "shmat() of shmid %d returned 0xffffffffffffffff %s, see PCMPIServerBegin()", allocation->shmid, strerror(errno));
291 #endif
292
293 if (!allocations) allocations = allocation;
294 else {
295 PetscShmgetAllocation next = allocations;
296 while (next->next) next = next->next;
297 next->next = allocation;
298 }
299 *addr = allocation->addr;
300 PetscCall(PetscInfo(NULL, "Allocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, allocation->shmkey, allocation->shmid, (int)allocation->sz));
301 }
302 #endif
303 PetscFunctionReturn(PETSC_SUCCESS);
304 }
305
306 /*@C
307 PetscShmgetDeallocateArray - deallocates shared memory accessible by all MPI processes in the server
308
309 Not Collective, only called on the first MPI process
310
311 Input Parameter:
312 . addr - the address of array
313
314 Level: developer
315
316 Note:
317 Uses `PetscFree()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running
318
319 Fortran Note:
320 The calling sequence is `PetscShmgetDeallocateArray[Scalar,Int](Petsc[Scalar,Int], pointer :: d1(:), ierr)`
321
322 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetAllocateArray()`
323 @*/
PetscShmgetDeallocateArray(void * addr[])324 PetscErrorCode PetscShmgetDeallocateArray(void *addr[])
325 {
326 PetscFunctionBegin;
327 if (!*addr) PetscFunctionReturn(PETSC_SUCCESS);
328 if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscFree(*addr));
329 #if defined(PETSC_HAVE_SHMGET)
330 else {
331 PetscShmgetAllocation next = allocations, previous = NULL;
332
333 while (next) {
334 if (next->addr == *addr) {
335 PetscCall(PetscInfo(NULL, "Deallocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, next->shmkey, next->shmid, (int)next->sz));
336 PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno));
337 PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory addr %p key %d shmid %d %s, see PCMPIServerBegin()", *addr, next->shmkey, next->shmid, strerror(errno));
338 *addr = NULL;
339 if (previous) previous->next = next->next;
340 else allocations = next->next;
341 PetscCall(PetscFree(next));
342 PetscFunctionReturn(PETSC_SUCCESS);
343 }
344 previous = next;
345 next = next->next;
346 }
347 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared memory address %p", *addr);
348 }
349 #endif
350 PetscFunctionReturn(PETSC_SUCCESS);
351 }
352
353 #if defined(PETSC_USE_FORTRAN_BINDINGS)
354 #include <petsc/private/ftnimpl.h>
355
356 #if defined(PETSC_HAVE_FORTRAN_CAPS)
357 #define petscshmgetallocatearrayscalar_ PETSCSHMGETALLOCATEARRAYSCALAR
358 #define petscshmgetdeallocatearrayscalar_ PETSCSHMGETDEALLOCATEARRAYSCALAR
359 #define petscshmgetallocatearrayint_ PETSCSHMGETALLOCATEARRAYINT
360 #define petscshmgetdeallocatearrayint_ PETSCSHMGETDEALLOCATEARRAYINT
361 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
362 #define petscshmgetallocatearrayscalar_ petscshmgetallocatearrayscalar
363 #define petscshmgetdeallocatearrayscalar_ petscshmgetdeallocatearrayscalar
364 #define petscshmgetallocatearrayint_ petscshmgetallocatearrayint
365 #define petscshmgetdeallocatearrayint_ petscshmgetdeallocatearrayint
366 #endif
367
petscshmgetallocatearrayscalar_(PetscInt * start,PetscInt * len,F90Array1d * a,PetscErrorCode * ierr PETSC_F90_2PTR_PROTO (ptrd))368 PETSC_EXTERN void petscshmgetallocatearrayscalar_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
369 {
370 PetscScalar *aa;
371
372 *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscScalar), (void **)&aa);
373 if (*ierr) return;
374 *ierr = F90Array1dCreate(aa, MPIU_SCALAR, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd));
375 }
376
petscshmgetdeallocatearrayscalar_(F90Array1d * a,PetscErrorCode * ierr PETSC_F90_2PTR_PROTO (ptrd))377 PETSC_EXTERN void petscshmgetdeallocatearrayscalar_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
378 {
379 PetscScalar *aa;
380
381 *ierr = F90Array1dAccess(a, MPIU_SCALAR, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd));
382 if (*ierr) return;
383 *ierr = PetscShmgetDeallocateArray((void **)&aa);
384 if (*ierr) return;
385 *ierr = F90Array1dDestroy(a, MPIU_SCALAR PETSC_F90_2PTR_PARAM(ptrd));
386 }
387
petscshmgetallocatearrayint_(PetscInt * start,PetscInt * len,F90Array1d * a,PetscErrorCode * ierr PETSC_F90_2PTR_PROTO (ptrd))388 PETSC_EXTERN void petscshmgetallocatearrayint_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
389 {
390 PetscInt *aa;
391
392 *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscInt), (void **)&aa);
393 if (*ierr) return;
394 *ierr = F90Array1dCreate(aa, MPIU_INT, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd));
395 }
396
petscshmgetdeallocatearrayint_(F90Array1d * a,PetscErrorCode * ierr PETSC_F90_2PTR_PROTO (ptrd))397 PETSC_EXTERN void petscshmgetdeallocatearrayint_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
398 {
399 PetscInt *aa;
400
401 *ierr = F90Array1dAccess(a, MPIU_INT, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd));
402 if (*ierr) return;
403 *ierr = PetscShmgetDeallocateArray((void **)&aa);
404 if (*ierr) return;
405 *ierr = F90Array1dDestroy(a, MPIU_INT PETSC_F90_2PTR_PARAM(ptrd));
406 }
407
408 #endif
409