xref: /petsc/src/sys/utils/server.c (revision bfe80ac4a46d58cb7760074b25f5e81b2f541d8a)
1 /*
2     Code for allocating Unix shared memory on MPI rank 0 and later accessing it from other MPI processes
3 */
4 #include <petsc/private/petscimpl.h>
5 #include <petscsys.h>
6 
7 PetscBool PCMPIServerActive    = PETSC_FALSE; // PETSc is running in server mode
8 PetscBool PCMPIServerInSolve   = PETSC_FALSE; // A parallel server solve is occurring
9 PetscBool PCMPIServerUseShmget = PETSC_TRUE;  // Use Unix shared memory for distributing objects
10 
11 #if defined(PETSC_HAVE_SHMGET)
12   #include <sys/shm.h>
13   #include <sys/mman.h>
14   #include <errno.h>
15 
16 typedef struct _PetscShmgetAllocation *PetscShmgetAllocation;
17 struct _PetscShmgetAllocation {
18   void                 *addr; // address on this process; points to same physical address on all processes
19   int                   shmkey, shmid;
20   size_t                sz;
21   PetscShmgetAllocation next;
22 };
23 static PetscShmgetAllocation allocations = NULL;
24 
25 typedef struct {
26   size_t shmkey[3];
27   size_t sz[3];
28 } BcastInfo;
29 
30 #endif
31 
32 /*@C
33   PetscShmgetAddressesFinalize - frees any shared memory that was allocated by `PetscShmgetAllocateArray()` but
34   not deallocated with `PetscShmgetDeallocateArray()`
35 
36   Level: developer
37 
38   Notes:
39   This prevents any shared memory allocated, but not deallocated, from remaining on the system and preventing
40   its future use.
41 
42   If the program crashes outstanding shared memory allocations may remain.
43 
44 .seealso: `PetscShmgetAllocateArray()`, `PetscShmgetDeallocateArray()`, `PetscShmgetUnmapAddresses()`
45 @*/
46 PetscErrorCode PetscShmgetAddressesFinalize(void)
47 {
48   PetscFunctionBegin;
49 #if defined(PETSC_HAVE_SHMGET)
50   PetscShmgetAllocation next = allocations, previous = NULL;
51 
52   while (next) {
53     PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory key %d shmid %d %s, see PCMPIServerBegin()", next->shmkey, next->shmid, strerror(errno));
54     previous = next;
55     next     = next->next;
56     PetscCall(PetscFree(previous));
57   }
58 #endif
59   PetscFunctionReturn(PETSC_SUCCESS);
60 }
61 
62 /* takes a void so can work bsan safe with PetscObjectContainerCompose() */
63 PetscErrorCode PCMPIServerAddressesDestroy(void **ctx)
64 {
65   PCMPIServerAddresses *addresses = (PCMPIServerAddresses *)*ctx;
66 
67   PetscFunctionBegin;
68 #if defined(PETSC_HAVE_SHMGET)
69   PetscCall(PetscShmgetUnmapAddresses(addresses->n, addresses->addr));
70   PetscCall(PetscFree(addresses));
71 #endif
72   PetscFunctionReturn(PETSC_SUCCESS);
73 }
74 
75 /*@C
76   PetscShmgetMapAddresses - given shared address on the first MPI process determines the
77   addresses on the other MPI processes that map to the same physical memory
78 
79   Input Parameters:
80 + comm       - the `MPI_Comm` to scatter the address
81 . n          - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()`
82 - baseaddres - the addresses on the first MPI process, ignored on all but first process
83 
84   Output Parameter:
85 . addres - the addresses on each MPI process, the array of void * must already be allocated
86 
87   Level: developer
88 
89   Note:
90   This routine does nothing if `PETSC_HAVE_SHMGET` is not defined
91 
92 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetUnmapAddresses()`
93 @*/
94 PetscErrorCode PetscShmgetMapAddresses(MPI_Comm comm, PetscInt n, const void **baseaddres, void **addres)
95 {
96   PetscFunctionBegin;
97 #if defined(PETSC_HAVE_SHMGET)
98   if (PetscGlobalRank == 0) {
99     BcastInfo bcastinfo = {
100       {0, 0, 0},
101       {0, 0, 0}
102     };
103     for (PetscInt i = 0; i < n; i++) {
104       PetscShmgetAllocation allocation = allocations;
105 
106       while (allocation) {
107         if (allocation->addr == baseaddres[i]) {
108           bcastinfo.shmkey[i] = allocation->shmkey;
109           bcastinfo.sz[i]     = allocation->sz;
110           addres[i]           = (void *)baseaddres[i];
111           break;
112         }
113         allocation = allocation->next;
114       }
115       PetscCheck(allocation, comm, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared address %p, see PCMPIServerBegin()", baseaddres[i]);
116     }
117     PetscCall(PetscInfo(NULL, "Mapping PCMPI Server array %p\n", addres[0]));
118     PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm));
119   } else {
120     BcastInfo bcastinfo = {
121       {0, 0, 0},
122       {0, 0, 0}
123     };
124     int    shmkey = 0;
125     size_t sz     = 0;
126 
127     PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm));
128     for (PetscInt i = 0; i < n; i++) {
129       PetscShmgetAllocation next = allocations, previous = NULL;
130 
131       shmkey = (int)bcastinfo.shmkey[i];
132       sz     = bcastinfo.sz[i];
133       while (next) {
134         if (next->shmkey == shmkey) addres[i] = next->addr;
135         previous = next;
136         next     = next->next;
137       }
138       if (!next) {
139         PetscShmgetAllocation allocation;
140         PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation));
141         allocation->shmkey = shmkey;
142         allocation->sz     = sz;
143         allocation->shmid  = shmget(allocation->shmkey, allocation->sz, 0666);
144         PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d of size %d, see PCMPIServerBegin()", allocation->shmkey, (int)allocation->sz);
145         allocation->addr = shmat(allocation->shmid, NULL, 0);
146         PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d, see PCMPIServerBegin()", allocation->shmkey);
147         addres[i] = allocation->addr;
148         if (previous) previous->next = allocation;
149         else allocations = allocation;
150       }
151     }
152   }
153 #endif
154   PetscFunctionReturn(PETSC_SUCCESS);
155 }
156 
157 /*@C
158   PetscShmgetUnmapAddresses - given shared addresses on a MPI process unlink it
159 
160   Input Parameters:
161 + n      - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()`
162 - addres - the addresses
163 
164   Level: developer
165 
166   Note:
167   This routine does nothing if `PETSC_HAVE_SHMGET` is not defined
168 
169 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetMapAddresses()`
170 @*/
171 PetscErrorCode PetscShmgetUnmapAddresses(PetscInt n, void **addres) PeNS
172 {
173   PetscFunctionBegin;
174 #if defined(PETSC_HAVE_SHMGET)
175   if (PetscGlobalRank > 0) {
176     for (PetscInt i = 0; i < n; i++) {
177       PetscShmgetAllocation next = allocations, previous = NULL;
178       PetscBool             found = PETSC_FALSE;
179 
180       while (next) {
181         if (next->addr == addres[i]) {
182           PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno));
183           if (previous) previous->next = next->next;
184           else allocations = next->next;
185           PetscCall(PetscFree(next));
186           found = PETSC_TRUE;
187           break;
188         }
189         previous = next;
190         next     = next->next;
191       }
192       PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to find address %p to unmap, see PCMPIServerBegin()", addres[i]);
193     }
194   }
195 #endif
196   PetscFunctionReturn(PETSC_SUCCESS);
197 }
198 
199 /*@C
200   PetscShmgetAllocateArray - allocates shared memory accessible by all MPI processes in the server
201 
202   Not Collective, only called on the first MPI process
203 
204   Input Parameters:
205 + sz  - the number of elements in the array
206 - asz - the size of an entry in the array, for example `sizeof(PetscScalar)`
207 
208   Output Parameters:
209 . addr - the address of the array
210 
211   Level: developer
212 
213   Notes:
214   Uses `PetscMalloc()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running
215 
216   Sometimes when a program crashes, shared memory IDs may remain, making it impossible to rerun the program.
217   Use
218 .vb
219   $PETSC_DIR/lib/petsc/bin/petscfreesharedmemory
220 .ve to free that memory. The Linux command `ipcrm --all` or macOS command `for i in $(ipcs -m | tail -$(expr $(ipcs -m | wc -l) - 3) | tr -s ' ' | cut -d" " -f3); do ipcrm -M $i; done`
221   will also free the memory.
222 
223   Use the Unix command `ipcs -m` to see what memory IDs are currently allocated and `ipcrm -m ID` to remove a memory ID
224 
225   Under Apple macOS the following file must be copied to /Library/LaunchDaemons/sharedmemory.plist (ensure this file is owned by root and not the user)
226   and the machine rebooted before using shared memory
227 .vb
228 <?xml version="1.0" encoding="UTF-8"?>
229 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
230 <plist version="1.0">
231 <dict>
232  <key>Label</key>
233  <string>shmemsetup</string>
234  <key>UserName</key>
235  <string>root</string>
236  <key>GroupName</key>
237  <string>wheel</string>
238  <key>ProgramArguments</key>
239  <array>
240  <string>/usr/sbin/sysctl</string>
241  <string>-w</string>
242  <string>kern.sysv.shmmax=4194304000</string>
243  <string>kern.sysv.shmmni=2064</string>
244  <string>kern.sysv.shmseg=2064</string>
245  <string>kern.sysv.shmall=131072000</string>
246   </array>
247  <key>KeepAlive</key>
248  <false/>
249  <key>RunAtLoad</key>
250  <true/>
251 </dict>
252 </plist>
253 .ve
254 
255   Use the command
256 .vb
257   /usr/sbin/sysctl -a | grep shm
258 .ve
259   to confirm that the shared memory limits you have requested are available.
260 
261   Fortran Note:
262   The calling sequence is `PetscShmgetAllocateArray[Scalar,Int](PetscInt start, PetscInt len, Petsc[Scalar,Int], pointer :: d1(:), ierr)`
263 
264   Developer Note:
265   More specifically this uses `PetscMalloc()` if `!PCMPIServerUseShmget` || `!PCMPIServerActive` || `PCMPIServerInSolve`
266   where `PCMPIServerInSolve` indicates that the solve is nested inside a MPI linear solver server solve and hence should
267   not allocate the vector and matrix memory in shared memory.
268 
269 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetDeallocateArray()`
270 @*/
271 PetscErrorCode PetscShmgetAllocateArray(size_t sz, size_t asz, void *addr[])
272 {
273   PetscFunctionBegin;
274   if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscMalloc(sz * asz, addr));
275 #if defined(PETSC_HAVE_SHMGET)
276   else {
277     PetscShmgetAllocation allocation;
278     static int            shmkeys = 10;
279 
280     PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation));
281     allocation->shmkey = shmkeys++;
282     allocation->sz     = sz * asz;
283     allocation->shmid  = shmget(allocation->shmkey, allocation->sz, 0666 | IPC_CREAT);
284     PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to schmget() of size %d with key %d %s see PetscShmgetAllocateArray()", (int)allocation->sz, allocation->shmkey, strerror(errno));
285     allocation->addr = shmat(allocation->shmid, NULL, 0);
286     PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to shmat() of shmid %d %s", allocation->shmid, strerror(errno));
287   #if PETSC_SIZEOF_VOID_P == 8
288     PetscCheck((uint64_t)allocation->addr != 0xffffffffffffffff, PETSC_COMM_SELF, PETSC_ERR_LIB, "shmat() of shmid %d returned 0xffffffffffffffff %s, see PCMPIServerBegin()", allocation->shmid, strerror(errno));
289   #endif
290 
291     if (!allocations) allocations = allocation;
292     else {
293       PetscShmgetAllocation next = allocations;
294       while (next->next) next = next->next;
295       next->next = allocation;
296     }
297     *addr = allocation->addr;
298     PetscCall(PetscInfo(NULL, "Allocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, allocation->shmkey, allocation->shmid, (int)allocation->sz));
299   }
300 #endif
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 /*@C
305   PetscShmgetDeallocateArray - deallocates shared memory accessible by all MPI processes in the server
306 
307   Not Collective, only called on the first MPI process
308 
309   Input Parameter:
310 . addr - the address of array
311 
312   Level: developer
313 
314   Note:
315   Uses `PetscFree()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running
316 
317   Fortran Note:
318   The calling sequence is `PetscShmgetDeallocateArray[Scalar,Int](Petsc[Scalar,Int], pointer :: d1(:), ierr)`
319 
320 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetAllocateArray()`
321 @*/
322 PetscErrorCode PetscShmgetDeallocateArray(void *addr[])
323 {
324   PetscFunctionBegin;
325   if (!*addr) PetscFunctionReturn(PETSC_SUCCESS);
326   if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscFree(*addr));
327 #if defined(PETSC_HAVE_SHMGET)
328   else {
329     PetscShmgetAllocation next = allocations, previous = NULL;
330 
331     while (next) {
332       if (next->addr == *addr) {
333         PetscCall(PetscInfo(NULL, "Deallocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, next->shmkey, next->shmid, (int)next->sz));
334         PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno));
335         PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory addr %p key %d shmid %d %s, see PCMPIServerBegin()", *addr, next->shmkey, next->shmid, strerror(errno));
336         *addr = NULL;
337         if (previous) previous->next = next->next;
338         else allocations = next->next;
339         PetscCall(PetscFree(next));
340         PetscFunctionReturn(PETSC_SUCCESS);
341       }
342       previous = next;
343       next     = next->next;
344     }
345     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared memory address %p", *addr);
346   }
347 #endif
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 #if defined(PETSC_USE_FORTRAN_BINDINGS)
352   #include <petsc/private/ftnimpl.h>
353 
354   #if defined(PETSC_HAVE_FORTRAN_CAPS)
355     #define petscshmgetallocatearrayscalar_   PETSCSHMGETALLOCATEARRAYSCALAR
356     #define petscshmgetdeallocatearrayscalar_ PETSCSHMGETDEALLOCATEARRAYSCALAR
357     #define petscshmgetallocatearrayint_      PETSCSHMGETALLOCATEARRAYINT
358     #define petscshmgetdeallocatearrayint_    PETSCSHMGETDEALLOCATEARRAYINT
359   #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
360     #define petscshmgetallocatearrayscalar_   petscshmgetallocatearrayscalar
361     #define petscshmgetdeallocatearrayscalar_ petscshmgetdeallocatearrayscalar
362     #define petscshmgetallocatearrayint_      petscshmgetallocatearrayint
363     #define petscshmgetdeallocatearrayint_    petscshmgetdeallocatearrayint
364   #endif
365 
366 PETSC_EXTERN void petscshmgetallocatearrayscalar_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
367 {
368   PetscScalar *aa;
369 
370   *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscScalar), (void **)&aa);
371   if (*ierr) return;
372   *ierr = F90Array1dCreate(aa, MPIU_SCALAR, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd));
373 }
374 
375 PETSC_EXTERN void petscshmgetdeallocatearrayscalar_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
376 {
377   PetscScalar *aa;
378 
379   *ierr = F90Array1dAccess(a, MPIU_SCALAR, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd));
380   if (*ierr) return;
381   *ierr = PetscShmgetDeallocateArray((void **)&aa);
382   if (*ierr) return;
383   *ierr = F90Array1dDestroy(a, MPIU_SCALAR PETSC_F90_2PTR_PARAM(ptrd));
384 }
385 
386 PETSC_EXTERN void petscshmgetallocatearrayint_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
387 {
388   PetscInt *aa;
389 
390   *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscInt), (void **)&aa);
391   if (*ierr) return;
392   *ierr = F90Array1dCreate(aa, MPIU_INT, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd));
393 }
394 
395 PETSC_EXTERN void petscshmgetdeallocatearrayint_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
396 {
397   PetscInt *aa;
398 
399   *ierr = F90Array1dAccess(a, MPIU_INT, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd));
400   if (*ierr) return;
401   *ierr = PetscShmgetDeallocateArray((void **)&aa);
402   if (*ierr) return;
403   *ierr = F90Array1dDestroy(a, MPIU_INT PETSC_F90_2PTR_PARAM(ptrd));
404 }
405 
406 #endif
407