xref: /petsc/src/sys/utils/server.c (revision 7a5338279d92d13360d231b9bd26d284f35eaa49)
1 /*
2     Code for allocating Unix shared memory on MPI rank 0 and later accessing it from other MPI processes
3 */
4 #include <petscsys.h>
5 
6 PetscBool PCMPIServerActive    = PETSC_FALSE; // PETSc is running in server mode
7 PetscBool PCMPIServerInSolve   = PETSC_FALSE; // A parallel server solve is occurring
8 PetscBool PCMPIServerUseShmget = PETSC_TRUE;  // Use Unix shared memory for distributing objects
9 
10 #if defined(PETSC_HAVE_SHMGET)
11   #include <sys/shm.h>
12   #include <sys/mman.h>
13   #include <errno.h>
14 
15 typedef struct _PetscShmgetAllocation *PetscShmgetAllocation;
16 struct _PetscShmgetAllocation {
17   void                 *addr; // address on this process; points to same physical address on all processes
18   int                   shmkey, shmid;
19   size_t                sz;
20   PetscShmgetAllocation next;
21 };
22 static PetscShmgetAllocation allocations = NULL;
23 
24 typedef struct {
25   size_t shmkey[3];
26   size_t sz[3];
27 } BcastInfo;
28 
29 #endif
30 
31 /*@C
32   PetscShmgetAddressesFinalize - frees any shared memory that was allocated by `PetscShmgetAllocateArray()` but
33   not deallocated with `PetscShmgetDeallocateArray()`
34 
35   Level: developer
36 
37   Notes:
38   This prevents any shared memory allocated, but not deallocated, from remaining on the system and preventing
39   its future use.
40 
41   If the program crashes outstanding shared memory allocations may remain.
42 
43 .seealso: `PetscShmgetAllocateArray()`, `PetscShmgetDeallocateArray()`, `PetscShmgetUnmapAddresses()`
44 @*/
45 PetscErrorCode PetscShmgetAddressesFinalize(void)
46 {
47   PetscFunctionBegin;
48 #if defined(PETSC_HAVE_SHMGET)
49   PetscShmgetAllocation next = allocations, previous = NULL;
50 
51   while (next) {
52     PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory key %d shmid %d %s, see PCMPIServerBegin()", next->shmkey, next->shmid, strerror(errno));
53     previous = next;
54     next     = next->next;
55     PetscCall(PetscFree(previous));
56   }
57 #endif
58   PetscFunctionReturn(PETSC_SUCCESS);
59 }
60 
61 /* takes a void so can work bsan safe with PetscObjectContainerCompose() */
62 PetscErrorCode PCMPIServerAddressesDestroy(void **ctx)
63 {
64   PCMPIServerAddresses *addresses = (PCMPIServerAddresses *)*ctx;
65 
66   PetscFunctionBegin;
67 #if defined(PETSC_HAVE_SHMGET)
68   PetscCall(PetscShmgetUnmapAddresses(addresses->n, addresses->addr));
69   PetscCall(PetscFree(addresses));
70 #endif
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
74 /*@C
75   PetscShmgetMapAddresses - given shared address on the first MPI process determines the
76   addresses on the other MPI processes that map to the same physical memory
77 
78   Input Parameters:
79 + comm       - the `MPI_Comm` to scatter the address
80 . n          - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()`
81 - baseaddres - the addresses on the first MPI process, ignored on all but first process
82 
83   Output Parameter:
84 . addres - the addresses on each MPI process, the array of void * must already be allocated
85 
86   Level: developer
87 
88   Note:
89   This routine does nothing if `PETSC_HAVE_SHMGET` is not defined
90 
91 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetUnmapAddresses()`
92 @*/
93 PetscErrorCode PetscShmgetMapAddresses(MPI_Comm comm, PetscInt n, const void **baseaddres, void **addres)
94 {
95   PetscFunctionBegin;
96 #if defined(PETSC_HAVE_SHMGET)
97   if (PetscGlobalRank == 0) {
98     BcastInfo bcastinfo = {
99       {0, 0, 0},
100       {0, 0, 0}
101     };
102     for (PetscInt i = 0; i < n; i++) {
103       PetscShmgetAllocation allocation = allocations;
104 
105       while (allocation) {
106         if (allocation->addr == baseaddres[i]) {
107           bcastinfo.shmkey[i] = allocation->shmkey;
108           bcastinfo.sz[i]     = allocation->sz;
109           addres[i]           = (void *)baseaddres[i];
110           break;
111         }
112         allocation = allocation->next;
113       }
114       PetscCheck(allocation, comm, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared address %p, see PCMPIServerBegin()", baseaddres[i]);
115     }
116     PetscCall(PetscInfo(NULL, "Mapping PCMPI Server array %p\n", addres[0]));
117     PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm));
118   } else {
119     BcastInfo bcastinfo = {
120       {0, 0, 0},
121       {0, 0, 0}
122     };
123     int    shmkey = 0;
124     size_t sz     = 0;
125 
126     PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm));
127     for (PetscInt i = 0; i < n; i++) {
128       PetscShmgetAllocation next = allocations, previous = NULL;
129 
130       shmkey = (int)bcastinfo.shmkey[i];
131       sz     = bcastinfo.sz[i];
132       while (next) {
133         if (next->shmkey == shmkey) addres[i] = next->addr;
134         previous = next;
135         next     = next->next;
136       }
137       if (!next) {
138         PetscShmgetAllocation allocation;
139         PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation));
140         allocation->shmkey = shmkey;
141         allocation->sz     = sz;
142         allocation->shmid  = shmget(allocation->shmkey, allocation->sz, 0666);
143         PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d of size %d, see PCMPIServerBegin()", allocation->shmkey, (int)allocation->sz);
144         allocation->addr = shmat(allocation->shmid, NULL, 0);
145         PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d, see PCMPIServerBegin()", allocation->shmkey);
146         addres[i] = allocation->addr;
147         if (previous) previous->next = allocation;
148         else allocations = allocation;
149       }
150     }
151   }
152 #endif
153   PetscFunctionReturn(PETSC_SUCCESS);
154 }
155 
156 /*@C
157   PetscShmgetUnmapAddresses - given shared addresses on a MPI process unlink it
158 
159   Input Parameters:
160 + n      - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()`
161 - addres - the addresses
162 
163   Level: developer
164 
165   Note:
166   This routine does nothing if `PETSC_HAVE_SHMGET` is not defined
167 
168 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetMapAddresses()`
169 @*/
170 PetscErrorCode PetscShmgetUnmapAddresses(PetscInt n, void **addres)
171 {
172   PetscFunctionBegin;
173 #if defined(PETSC_HAVE_SHMGET)
174   if (PetscGlobalRank > 0) {
175     for (PetscInt i = 0; i < n; i++) {
176       PetscShmgetAllocation next = allocations, previous = NULL;
177       PetscBool             found = PETSC_FALSE;
178 
179       while (next) {
180         if (next->addr == addres[i]) {
181           PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno));
182           if (previous) previous->next = next->next;
183           else allocations = next->next;
184           PetscCall(PetscFree(next));
185           found = PETSC_TRUE;
186           break;
187         }
188         previous = next;
189         next     = next->next;
190       }
191       PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to find address %p to unmap, see PCMPIServerBegin()", addres[i]);
192     }
193   }
194 #endif
195   PetscFunctionReturn(PETSC_SUCCESS);
196 }
197 
198 /*@C
199   PetscShmgetAllocateArray - allocates shared memory accessible by all MPI processes in the server
200 
201   Not Collective, only called on the first MPI process
202 
203   Input Parameters:
204 + sz  - the number of elements in the array
205 - asz - the size of an entry in the array, for example `sizeof(PetscScalar)`
206 
207   Output Parameters:
208 . addr - the address of the array
209 
210   Level: developer
211 
212   Notes:
213   Uses `PetscMalloc()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running
214 
215   Sometimes when a program crashes, shared memory IDs may remain, making it impossible to rerun the program.
216   Use
217 .vb
218   $PETSC_DIR/lib/petsc/bin/petscfreesharedmemory
219 .ve to free that memory. The Linux command `ipcrm --all` or macOS command `for i in $(ipcs -m | tail -$(expr $(ipcs -m | wc -l) - 3) | tr -s ' ' | cut -d" " -f3); do ipcrm -M $i; done`
220   will also free the memory.
221 
222   Use the Unix command `ipcs -m` to see what memory IDs are currently allocated and `ipcrm -m ID` to remove a memory ID
223 
224   Under Apple macOS the following file must be copied to /Library/LaunchDaemons/sharedmemory.plist (ensure this file is owned by root and not the user)
225   and the machine rebooted before using shared memory
226 .vb
227 <?xml version="1.0" encoding="UTF-8"?>
228 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
229 <plist version="1.0">
230 <dict>
231  <key>Label</key>
232  <string>shmemsetup</string>
233  <key>UserName</key>
234  <string>root</string>
235  <key>GroupName</key>
236  <string>wheel</string>
237  <key>ProgramArguments</key>
238  <array>
239  <string>/usr/sbin/sysctl</string>
240  <string>-w</string>
241  <string>kern.sysv.shmmax=4194304000</string>
242  <string>kern.sysv.shmmni=2064</string>
243  <string>kern.sysv.shmseg=2064</string>
244  <string>kern.sysv.shmall=131072000</string>
245   </array>
246  <key>KeepAlive</key>
247  <false/>
248  <key>RunAtLoad</key>
249  <true/>
250 </dict>
251 </plist>
252 .ve
253 
254   Use the command
255 .vb
256   /usr/sbin/sysctl -a | grep shm
257 .ve
258   to confirm that the shared memory limits you have requested are available.
259 
260   Fortran Note:
261   The calling sequence is `PetscShmgetAllocateArray[Scalar,Int](PetscInt start, PetscInt len, Petsc[Scalar,Int], pointer :: d1(:), ierr)`
262 
263   Developer Note:
264   More specifically this uses `PetscMalloc()` if `!PCMPIServerUseShmget` || `!PCMPIServerActive` || `PCMPIServerInSolve`
265   where `PCMPIServerInSolve` indicates that the solve is nested inside a MPI linear solver server solve and hence should
266   not allocate the vector and matrix memory in shared memory.
267 
268 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetDeallocateArray()`
269 @*/
270 PetscErrorCode PetscShmgetAllocateArray(size_t sz, size_t asz, void **addr)
271 {
272   PetscFunctionBegin;
273   if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscMalloc(sz * asz, addr));
274 #if defined(PETSC_HAVE_SHMGET)
275   else {
276     PetscShmgetAllocation allocation;
277     static int            shmkeys = 10;
278 
279     PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation));
280     allocation->shmkey = shmkeys++;
281     allocation->sz     = sz * asz;
282     allocation->shmid  = shmget(allocation->shmkey, allocation->sz, 0666 | IPC_CREAT);
283     PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to schmget() of size %d with key %d %s see PetscShmgetAllocateArray()", (int)allocation->sz, allocation->shmkey, strerror(errno));
284     allocation->addr = shmat(allocation->shmid, NULL, 0);
285     PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to shmat() of shmid %d %s", allocation->shmid, strerror(errno));
286   #if PETSC_SIZEOF_VOID_P == 8
287     PetscCheck((uint64_t)allocation->addr != 0xffffffffffffffff, PETSC_COMM_SELF, PETSC_ERR_LIB, "shmat() of shmid %d returned 0xffffffffffffffff %s, see PCMPIServerBegin()", allocation->shmid, strerror(errno));
288   #endif
289 
290     if (!allocations) allocations = allocation;
291     else {
292       PetscShmgetAllocation next = allocations;
293       while (next->next) next = next->next;
294       next->next = allocation;
295     }
296     *addr = allocation->addr;
297     PetscCall(PetscInfo(NULL, "Allocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, allocation->shmkey, allocation->shmid, (int)allocation->sz));
298   }
299 #endif
300   PetscFunctionReturn(PETSC_SUCCESS);
301 }
302 
303 /*@C
304   PetscShmgetDeallocateArray - deallocates shared memory accessible by all MPI processes in the server
305 
306   Not Collective, only called on the first MPI process
307 
308   Input Parameter:
309 . addr - the address of array
310 
311   Level: developer
312 
313   Note:
314   Uses `PetscFree()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running
315 
316   Fortran Note:
317   The calling sequence is `PetscShmgetDeallocateArray[Scalar,Int](Petsc[Scalar,Int], pointer :: d1(:), ierr)`
318 
319 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetAllocateArray()`
320 @*/
321 PetscErrorCode PetscShmgetDeallocateArray(void **addr)
322 {
323   PetscFunctionBegin;
324   if (!*addr) PetscFunctionReturn(PETSC_SUCCESS);
325   if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscFree(*addr));
326 #if defined(PETSC_HAVE_SHMGET)
327   else {
328     PetscShmgetAllocation next = allocations, previous = NULL;
329 
330     while (next) {
331       if (next->addr == *addr) {
332         PetscCall(PetscInfo(NULL, "Deallocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, next->shmkey, next->shmid, (int)next->sz));
333         PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno));
334         PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory addr %p key %d shmid %d %s, see PCMPIServerBegin()", *addr, next->shmkey, next->shmid, strerror(errno));
335         *addr = NULL;
336         if (previous) previous->next = next->next;
337         else allocations = next->next;
338         PetscCall(PetscFree(next));
339         PetscFunctionReturn(PETSC_SUCCESS);
340       }
341       previous = next;
342       next     = next->next;
343     }
344     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared memory address %p", *addr);
345   }
346 #endif
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 #if defined(PETSC_USE_FORTRAN_BINDINGS)
351   #include <petsc/private/f90impl.h>
352 
353   #if defined(PETSC_HAVE_FORTRAN_CAPS)
354     #define petscshmgetallocatearrayscalar_   PETSCSHMGETALLOCATEARRAYSCALAR
355     #define petscshmgetdeallocatearrayscalar_ PETSCSHMGETDEALLOCATEARRAYSCALAR
356     #define petscshmgetallocatearrayint_      PETSCSHMGETALLOCATEARRAYINT
357     #define petscshmgetdeallocatearrayint_    PETSCSHMGETDEALLOCATEARRAYINT
358   #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
359     #define petscshmgetallocatearrayscalar_   petscshmgetallocatearrayscalar
360     #define petscshmgetdeallocatearrayscalar_ petscshmgetdeallocatearrayscalar
361     #define petscshmgetallocatearrayint_      petscshmgetallocatearrayint
362     #define petscshmgetdeallocatearrayint_    petscshmgetdeallocatearrayint
363   #endif
364 
365 PETSC_EXTERN void petscshmgetallocatearrayscalar_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
366 {
367   PetscScalar *aa;
368 
369   *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscScalar), (void **)&aa);
370   if (*ierr) return;
371   *ierr = F90Array1dCreate(aa, MPIU_SCALAR, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd));
372 }
373 
374 PETSC_EXTERN void petscshmgetdeallocatearrayscalar_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
375 {
376   PetscScalar *aa;
377 
378   *ierr = F90Array1dAccess(a, MPIU_SCALAR, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd));
379   if (*ierr) return;
380   *ierr = PetscShmgetDeallocateArray((void **)&aa);
381   if (*ierr) return;
382   *ierr = F90Array1dDestroy(a, MPIU_SCALAR PETSC_F90_2PTR_PARAM(ptrd));
383 }
384 
385 PETSC_EXTERN void petscshmgetallocatearrayint_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
386 {
387   PetscScalar *aa;
388 
389   *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscInt), (void **)&aa);
390   if (*ierr) return;
391   *ierr = F90Array1dCreate(aa, MPIU_INT, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd));
392 }
393 
394 PETSC_EXTERN void petscshmgetdeallocatearrayint_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
395 {
396   PetscScalar *aa;
397 
398   *ierr = F90Array1dAccess(a, MPIU_INT, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd));
399   if (*ierr) return;
400   *ierr = PetscShmgetDeallocateArray((void **)&aa);
401   if (*ierr) return;
402   *ierr = F90Array1dDestroy(a, MPIU_INT PETSC_F90_2PTR_PARAM(ptrd));
403 }
404 
405 #endif
406