xref: /petsc/src/sys/utils/server.c (revision 9281ddf3762f2d5c362e1f7018c73fc774f3a8d2)
1 /*
2     Code for allocating Unix shared memory on MPI rank 0 and later accessing it from other MPI processes
3 */
4 #include <petscsys.h>
5 
6 PetscBool PCMPIServerActive    = PETSC_FALSE; // PETSc is running in server mode
7 PetscBool PCMPIServerInSolve   = PETSC_FALSE; // A parallel server solve is occuring
8 PetscBool PCMPIServerUseShmget = PETSC_TRUE;  // Use Unix shared memory for distributing objects
9 
10 #if defined(PETSC_HAVE_SHMGET)
11   #include <sys/shm.h>
12   #include <sys/mman.h>
13   #include <errno.h>
14 
15 typedef struct _PetscShmgetAllocation *PetscShmgetAllocation;
16 struct _PetscShmgetAllocation {
17   void                 *addr; // address on this process; points to same physical address on all processes
18   int                   shmkey, shmid;
19   size_t                sz;
20   PetscShmgetAllocation next;
21 };
22 static PetscShmgetAllocation allocations = NULL;
23 
24 typedef struct {
25   size_t shmkey[3];
26   size_t sz[3];
27 } BcastInfo;
28 
29 #endif
30 
31 /*@C
32   PetscShmgetAddressesFinalize - frees any shared memory that was allocated by `PetscShmgetAllocateArray()` but
33   not deallocated with `PetscShmgetDeallocateArray()`
34 
35   Level: developer
36 
37   Notes:
38   This prevents any shared memory allocated, but not deallocated, from remaining on the system and preventing
39   its future use.
40 
41   If the program crashes outstanding shared memory allocations may remain.
42 
43 .seealso: `PetscShmgetAllocateArray()`, `PetscShmgetDeallocateArray()`, `PetscShmgetUnmapAddresses()`
44 @*/
45 PetscErrorCode PetscShmgetAddressesFinalize(void)
46 {
47   PetscFunctionBegin;
48 #if defined(PETSC_HAVE_SHMGET)
49   PetscShmgetAllocation next = allocations, previous = NULL;
50 
51   while (next) {
52     PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory key %d shmid %d %s", next->shmkey, next->shmid, strerror(errno));
53     previous = next;
54     next     = next->next;
55     PetscCall(PetscFree(previous));
56   }
57 #endif
58   PetscFunctionReturn(PETSC_SUCCESS);
59 }
60 
61 /* takes a void so can work bsan safe with PetscObjectContainerCompose() */
62 PetscErrorCode PCMPIServerAddressesDestroy(void *ctx)
63 {
64   PCMPIServerAddresses *addresses = (PCMPIServerAddresses *)ctx;
65 
66   PetscFunctionBegin;
67 #if defined(PETSC_HAVE_SHMGET)
68   PetscCall(PetscShmgetUnmapAddresses(addresses->n, addresses->addr));
69   PetscCall(PetscFree(addresses));
70 #endif
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
74 /*@C
75   PetscShmgetMapAddresses - given shared address on the first MPI process determines the
76   addresses on the other MPI processes that map to the same physical memory
77 
78   Input Parameters:
79 + comm       - the `MPI_Comm` to scatter the address
80 . n          - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()`
81 - baseaddres - the addresses on the first MPI process, ignored on all but first process
82 
83   Output Parameter:
84 . addres - the addresses on each MPI process, the array of void * must already be allocated
85 
86   Level: developer
87 
88   Note:
89   This routine does nothing if `PETSC_HAVE_SHMGET` is not defined
90 
91 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetUnmapAddresses()`
92 @*/
93 PetscErrorCode PetscShmgetMapAddresses(MPI_Comm comm, PetscInt n, const void **baseaddres, void **addres)
94 {
95   PetscFunctionBegin;
96 #if defined(PETSC_HAVE_SHMGET)
97   if (PetscGlobalRank == 0) {
98     BcastInfo bcastinfo = {
99       {0, 0, 0},
100       {0, 0, 0}
101     };
102     for (PetscInt i = 0; i < n; i++) {
103       PetscShmgetAllocation allocation = allocations;
104 
105       while (allocation) {
106         if (allocation->addr == baseaddres[i]) {
107           bcastinfo.shmkey[i] = allocation->shmkey;
108           bcastinfo.sz[i]     = allocation->sz;
109           addres[i]           = (void *)baseaddres[i];
110           break;
111         }
112         allocation = allocation->next;
113       }
114       PetscCheck(allocation, comm, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared address %p", baseaddres[i]);
115     }
116     PetscCall(PetscInfo(NULL, "Mapping PCMPI Server array %p\n", addres[0]));
117     PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm));
118   } else {
119     BcastInfo bcastinfo = {
120       {0, 0, 0},
121       {0, 0, 0}
122     };
123     int    shmkey = 0;
124     size_t sz     = 0;
125 
126     PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm));
127     for (PetscInt i = 0; i < n; i++) {
128       PetscShmgetAllocation next = allocations, previous = NULL;
129 
130       shmkey = (int)bcastinfo.shmkey[i];
131       sz     = bcastinfo.sz[i];
132       while (next) {
133         if (next->shmkey == shmkey) addres[i] = (void *)next->addr;
134         previous = next;
135         next     = next->next;
136       }
137       if (!next) {
138         PetscShmgetAllocation allocation;
139         PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation));
140         allocation->shmkey = shmkey;
141         allocation->sz     = sz;
142         allocation->shmid  = shmget(allocation->shmkey, allocation->sz, 0666);
143         PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d of size %d", allocation->shmkey, (int)allocation->sz);
144         allocation->addr = shmat(allocation->shmid, NULL, 0);
145         PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d", allocation->shmkey);
146         addres[i] = allocation->addr;
147         if (previous) previous->next = allocation;
148         else allocations = allocation;
149       }
150     }
151   }
152 #endif
153   PetscFunctionReturn(PETSC_SUCCESS);
154 }
155 
156 /*@C
157   PetscShmgetUnmapAddresses - given shared addresses on a MPI process unlink it
158 
159   Input Parameters:
160 + n      - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()`
161 - addres - the addresses
162 
163   Level: developer
164 
165   Note:
166   This routine does nothing if `PETSC_HAVE_SHMGET` is not defined
167 
168 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetMapAddresses()`
169 @*/
170 PetscErrorCode PetscShmgetUnmapAddresses(PetscInt n, void **addres)
171 {
172   PetscFunctionBegin;
173 #if defined(PETSC_HAVE_SHMGET)
174   if (PetscGlobalRank > 0) {
175     for (PetscInt i = 0; i < n; i++) {
176       PetscShmgetAllocation next = allocations, previous = NULL;
177       PetscBool             found = PETSC_FALSE;
178 
179       while (next) {
180         if (next->addr == addres[i]) {
181           PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s", strerror(errno));
182           if (previous) previous->next = next->next;
183           else allocations = next->next;
184           PetscCall(PetscFree(next));
185           found = PETSC_TRUE;
186           break;
187         }
188         previous = next;
189         next     = next->next;
190       }
191       PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to find address %p to unmap", addres[i]);
192     }
193   }
194 #endif
195   PetscFunctionReturn(PETSC_SUCCESS);
196 }
197 
198 /*@C
199   PetscShmgetAllocateArray - allocates shared memory accessable by all MPI processes in the server
200 
201   Not Collective, only called on the first MPI process
202 
203   Input Parameters:
204 + sz  - the number of elements in the array
205 - asz - the size of an entry in the array, for example `sizeof(PetscScalar)`
206 
207   Output Parameters:
208 . addr - the address of the array
209 
210   Level: developer
211 
212   Notes:
213   Uses `PetscMalloc()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running
214 
215   Sometimes when a program crashes, shared memory IDs may remain, making it impossible to rerun the program.
216   Use $PETSC_DIR/lib/petsc/bin/petscfreesharedmemory to free that memory
217 
218   Use the Unix command `ipcs -m` to see what memory IDs are currently allocated and `ipcrm -m ID` to remove a memory ID
219 
220   Use the Unix command `ipcrm --all` or `for i in $(ipcs -m | tail -$(expr $(ipcs -m | wc -l) - 3) | tr -s ' ' | cut -d" " -f3); do ipcrm -M $i; done`
221   to delete all the currently allocated memory IDs.
222 
223   Under Apple macOS the following file must be copied to /Library/LaunchDaemons/sharedmemory.plist and the machine rebooted before using shared memory
224 .vb
225 <?xml version="1.0" encoding="UTF-8"?>
226 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
227 <plist version="1.0">
228 <dict>
229  <key>Label</key>
230  <string>shmemsetup</string>
231  <key>UserName</key>
232  <string>root</string>
233  <key>GroupName</key>
234  <string>wheel</string>
235  <key>ProgramArguments</key>
236  <array>
237  <string>/usr/sbin/sysctl</string>
238  <string>-w</string>
239  <string>kern.sysv.shmmax=4194304000</string>
240  <string>kern.sysv.shmmni=2064</string>
241  <string>kern.sysv.shmseg=2064</string>
242  <string>kern.sysv.shmall=131072000</string>
243   </array>
244  <key>KeepAlive</key>
245  <false/>
246  <key>RunAtLoad</key>
247  <true/>
248 </dict>
249 </plist>
250 .ve
251 
252   Fortran Note:
253   The calling sequence is `PetscShmgetAllocateArray[Scalar,Int](PetscInt start, PetscInt len, Petsc[Scalar,Int], pointer :: d1(:), ierr)`
254 
255   Developer Note:
256   More specifically this uses `PetscMalloc()` if `!PCMPIServerUseShmget` || `!PCMPIServerActive` || `PCMPIServerInSolve`
257   where `PCMPIServerInSolve` indicates that the solve is nested inside a MPI linear solver server solve and hence should
258   not allocate the vector and matrix memory in shared memory.
259 
260 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetDeallocateArray()`
261 @*/
262 PetscErrorCode PetscShmgetAllocateArray(size_t sz, size_t asz, void **addr)
263 {
264   PetscFunctionBegin;
265   if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscMalloc(sz * asz, addr));
266 #if defined(PETSC_HAVE_SHMGET)
267   else {
268     PetscShmgetAllocation allocation;
269     static int            shmkeys = 10;
270 
271     PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation));
272     allocation->shmkey = shmkeys++;
273     allocation->sz     = sz * asz;
274     allocation->shmid  = shmget(allocation->shmkey, allocation->sz, 0666 | IPC_CREAT);
275     PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to schmget() of size %d with key %d %s see PetscShmgetAllocateArray()", (int)allocation->sz, allocation->shmkey, strerror(errno));
276     allocation->addr = shmat(allocation->shmid, NULL, 0);
277     PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to shmat() of shmid %d %s", (int)allocation->shmid, strerror(errno));
278   #if PETSC_SIZEOF_VOID_P == 8
279     PetscCheck((uint64_t)allocation->addr != 0xffffffffffffffff, PETSC_COMM_SELF, PETSC_ERR_LIB, "shmat() of shmid %d returned 0xffffffffffffffff %s", (int)allocation->shmid, strerror(errno));
280   #endif
281 
282     if (!allocations) allocations = allocation;
283     else {
284       PetscShmgetAllocation next = allocations;
285       while (next->next) next = next->next;
286       next->next = allocation;
287     }
288     *addr = allocation->addr;
289     PetscCall(PetscInfo(NULL, "Allocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, allocation->shmkey, allocation->shmid, (int)allocation->sz));
290   }
291 #endif
292   PetscFunctionReturn(PETSC_SUCCESS);
293 }
294 
295 /*@C
296   PetscShmgetDeallocateArray - deallocates shared memory accessable by all MPI processes in the server
297 
298   Not Collective, only called on the first MPI process
299 
300   Input Parameter:
301 . addr - the address of array
302 
303   Level: developer
304 
305   Note:
306   Uses `PetscFree()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running
307 
308   Fortran Note:
309   The calling sequence is `PetscShmgetDeallocateArray[Scalar,Int](Petsc[Scalar,Int], pointer :: d1(:), ierr)`
310 
311 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetAllocateArray()`
312 @*/
313 PetscErrorCode PetscShmgetDeallocateArray(void **addr)
314 {
315   PetscFunctionBegin;
316   if (!*addr) PetscFunctionReturn(PETSC_SUCCESS);
317   if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscFree(*addr));
318 #if defined(PETSC_HAVE_SHMGET)
319   else {
320     PetscShmgetAllocation next = allocations, previous = NULL;
321 
322     while (next) {
323       if (next->addr == *addr) {
324         PetscCall(PetscInfo(NULL, "Deallocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, next->shmkey, next->shmid, (int)next->sz));
325         PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory addr %p key %d shmid %d %s", *addr, next->shmkey, next->shmid, strerror(errno));
326         *addr = NULL;
327         if (previous) previous->next = next->next;
328         else allocations = next->next;
329         PetscCall(PetscFree(next));
330         PetscFunctionReturn(PETSC_SUCCESS);
331       }
332       previous = next;
333       next     = next->next;
334     }
335     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared memory address %p", *addr);
336   }
337 #endif
338   PetscFunctionReturn(PETSC_SUCCESS);
339 }
340 
341 #if defined(PETSC_USE_FORTRAN_BINDINGS)
342   #include <petsc/private/f90impl.h>
343 
344   #if defined(PETSC_HAVE_FORTRAN_CAPS)
345     #define petscshmgetallocatearrayscalar_   PETSCSHMGETALLOCATEARRAYSCALAR
346     #define petscshmgetdeallocatearrayscalar_ PETSCSHMGETDEALLOCATEARRAYSCALAR
347     #define petscshmgetallocatearrayint_      PETSCSHMGETALLOCATEARRAYINT
348     #define petscshmgetdeallocatearrayint_    PETSCSHMGETDEALLOCATEARRAYINT
349   #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
350     #define petscshmgetallocatearrayscalar_   petscshmgetallocatearrayscalar
351     #define petscshmgetdeallocatearrayscalar_ petscshmgetdeallocatearrayscalar
352     #define petscshmgetallocatearrayint_      petscshmgetallocatearrayint
353     #define petscshmgetdeallocatearrayint_    petscshmgetdeallocatearrayint
354   #endif
355 
356 PETSC_EXTERN void petscshmgetallocatearrayscalar_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
357 {
358   PetscScalar *aa;
359 
360   *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscScalar), (void **)&aa);
361   if (*ierr) return;
362   *ierr = F90Array1dCreate(aa, MPIU_SCALAR, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd));
363 }
364 
365 PETSC_EXTERN void petscshmgetdeallocatearrayscalar_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
366 {
367   PetscScalar *aa;
368 
369   *ierr = F90Array1dAccess(a, MPIU_SCALAR, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd));
370   if (*ierr) return;
371   *ierr = PetscShmgetDeallocateArray((void **)&aa);
372   if (*ierr) return;
373   *ierr = F90Array1dDestroy(a, MPIU_SCALAR PETSC_F90_2PTR_PARAM(ptrd));
374 }
375 
376 PETSC_EXTERN void petscshmgetallocatearrayint_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
377 {
378   PetscScalar *aa;
379 
380   *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscInt), (void **)&aa);
381   if (*ierr) return;
382   *ierr = F90Array1dCreate(aa, MPIU_INT, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd));
383 }
384 
385 PETSC_EXTERN void petscshmgetdeallocatearrayint_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
386 {
387   PetscScalar *aa;
388 
389   *ierr = F90Array1dAccess(a, MPIU_INT, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd));
390   if (*ierr) return;
391   *ierr = PetscShmgetDeallocateArray((void **)&aa);
392   if (*ierr) return;
393   *ierr = F90Array1dDestroy(a, MPIU_INT PETSC_F90_2PTR_PARAM(ptrd));
394 }
395 
396 #endif
397