xref: /petsc/src/sys/utils/server.c (revision 4e8208cbcbc709572b8abe32f33c78b69c819375)
1 /*
2     Code for allocating Unix shared memory on MPI rank 0 and later accessing it from other MPI processes
3 */
4 #include <petsc/private/petscimpl.h>
5 #include <petscsys.h>
6 
7 PetscBool PCMPIServerActive    = PETSC_FALSE; // PETSc is running in server mode
8 PetscBool PCMPIServerInSolve   = PETSC_FALSE; // A parallel server solve is occurring
9 PetscBool PCMPIServerUseShmget = PETSC_TRUE;  // Use Unix shared memory for distributing objects
10 
11 #if defined(PETSC_HAVE_SHMGET)
12   #include <sys/shm.h>
13   #include <sys/mman.h>
14   #include <errno.h>
15 
16 typedef struct _PetscShmgetAllocation *PetscShmgetAllocation;
17 struct _PetscShmgetAllocation {
18   void                 *addr; // address on this process; points to same physical address on all processes
19   int                   shmkey, shmid;
20   size_t                sz;
21   PetscShmgetAllocation next;
22 };
23 static PetscShmgetAllocation allocations = NULL;
24 
25 typedef struct {
26   size_t shmkey[3];
27   size_t sz[3];
28 } BcastInfo;
29 
30 #endif
31 
32 /*@C
33   PetscShmgetAddressesFinalize - frees any shared memory that was allocated by `PetscShmgetAllocateArray()` but
34   not deallocated with `PetscShmgetDeallocateArray()`
35 
36   Level: developer
37 
38   Notes:
39   This prevents any shared memory allocated, but not deallocated, from remaining on the system and preventing
40   its future use.
41 
42   If the program crashes outstanding shared memory allocations may remain.
43 
44 .seealso: `PetscShmgetAllocateArray()`, `PetscShmgetDeallocateArray()`, `PetscShmgetUnmapAddresses()`
45 @*/
PetscShmgetAddressesFinalize(void)46 PetscErrorCode PetscShmgetAddressesFinalize(void)
47 {
48   PetscFunctionBegin;
49 #if defined(PETSC_HAVE_SHMGET)
50   PetscShmgetAllocation next = allocations, previous = NULL;
51 
52   while (next) {
53     PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory key %d shmid %d %s, see PCMPIServerBegin()", next->shmkey, next->shmid, strerror(errno));
54     previous = next;
55     next     = next->next;
56     PetscCall(PetscFree(previous));
57   }
58 #endif
59   PetscFunctionReturn(PETSC_SUCCESS);
60 }
61 
62 /* takes a void so can work bsan safe with PetscObjectContainerCompose() */
PCMPIServerAddressesDestroy(PetscCtxRt ctx)63 PetscErrorCode PCMPIServerAddressesDestroy(PetscCtxRt ctx)
64 {
65   PCMPIServerAddresses *addresses = *(PCMPIServerAddresses **)ctx;
66 
67   PetscFunctionBegin;
68 #if defined(PETSC_HAVE_SHMGET)
69   PetscCall(PetscShmgetUnmapAddresses(addresses->n, addresses->addr));
70   PetscCall(PetscFree(addresses));
71 #else
72   (void)addresses;
73 #endif
74   PetscFunctionReturn(PETSC_SUCCESS);
75 }
76 
77 /*@C
78   PetscShmgetMapAddresses - given shared address on the first MPI process determines the
79   addresses on the other MPI processes that map to the same physical memory
80 
81   Input Parameters:
82 + comm       - the `MPI_Comm` to scatter the address
83 . n          - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()`
84 - baseaddres - the addresses on the first MPI process, ignored on all but first process
85 
86   Output Parameter:
87 . addres - the addresses on each MPI process, the array of void * must already be allocated
88 
89   Level: developer
90 
91   Note:
92   This routine does nothing if `PETSC_HAVE_SHMGET` is not defined
93 
94 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetUnmapAddresses()`
95 @*/
PetscShmgetMapAddresses(MPI_Comm comm,PetscInt n,const void ** baseaddres,void ** addres)96 PetscErrorCode PetscShmgetMapAddresses(MPI_Comm comm, PetscInt n, const void **baseaddres, void **addres)
97 {
98   PetscFunctionBegin;
99 #if defined(PETSC_HAVE_SHMGET)
100   if (PetscGlobalRank == 0) {
101     BcastInfo bcastinfo = {
102       {0, 0, 0},
103       {0, 0, 0}
104     };
105     for (PetscInt i = 0; i < n; i++) {
106       PetscShmgetAllocation allocation = allocations;
107 
108       while (allocation) {
109         if (allocation->addr == baseaddres[i]) {
110           bcastinfo.shmkey[i] = allocation->shmkey;
111           bcastinfo.sz[i]     = allocation->sz;
112           addres[i]           = (void *)baseaddres[i];
113           break;
114         }
115         allocation = allocation->next;
116       }
117       PetscCheck(allocation, comm, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared address %p, see PCMPIServerBegin()", baseaddres[i]);
118     }
119     PetscCall(PetscInfo(NULL, "Mapping PCMPI Server array %p\n", addres[0]));
120     PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm));
121   } else {
122     BcastInfo bcastinfo = {
123       {0, 0, 0},
124       {0, 0, 0}
125     };
126     int    shmkey = 0;
127     size_t sz     = 0;
128 
129     PetscCallMPI(MPI_Bcast(&bcastinfo, 6, MPIU_SIZE_T, 0, comm));
130     for (PetscInt i = 0; i < n; i++) {
131       PetscShmgetAllocation next = allocations, previous = NULL;
132 
133       shmkey = (int)bcastinfo.shmkey[i];
134       sz     = bcastinfo.sz[i];
135       while (next) {
136         if (next->shmkey == shmkey) addres[i] = next->addr;
137         previous = next;
138         next     = next->next;
139       }
140       if (!next) {
141         PetscShmgetAllocation allocation;
142         PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation));
143         allocation->shmkey = shmkey;
144         allocation->sz     = sz;
145         allocation->shmid  = shmget(allocation->shmkey, allocation->sz, 0666);
146         PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d of size %d, see PCMPIServerBegin()", allocation->shmkey, (int)allocation->sz);
147         allocation->addr = shmat(allocation->shmid, NULL, 0);
148         PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to map PCMPI shared memory key %d, see PCMPIServerBegin()", allocation->shmkey);
149         addres[i] = allocation->addr;
150         if (previous) previous->next = allocation;
151         else allocations = allocation;
152       }
153     }
154   }
155 #endif
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 /*@C
160   PetscShmgetUnmapAddresses - given shared addresses on a MPI process unlink it
161 
162   Input Parameters:
163 + n      - the number of addresses, each obtained on MPI process zero by `PetscShmgetAllocateArray()`
164 - addres - the addresses
165 
166   Level: developer
167 
168   Note:
169   This routine does nothing if `PETSC_HAVE_SHMGET` is not defined
170 
171 .seealso: `PetscShmgetDeallocateArray()`, `PetscShmgetAllocateArray()`, `PetscShmgetMapAddresses()`
172 @*/
PetscShmgetUnmapAddresses(PetscInt n,void ** addres)173 PetscErrorCode PetscShmgetUnmapAddresses(PetscInt n, void **addres) PeNS
174 {
175   PetscFunctionBegin;
176 #if defined(PETSC_HAVE_SHMGET)
177   if (PetscGlobalRank > 0) {
178     for (PetscInt i = 0; i < n; i++) {
179       PetscShmgetAllocation next = allocations, previous = NULL;
180       PetscBool             found = PETSC_FALSE;
181 
182       while (next) {
183         if (next->addr == addres[i]) {
184           PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno));
185           if (previous) previous->next = next->next;
186           else allocations = next->next;
187           PetscCall(PetscFree(next));
188           found = PETSC_TRUE;
189           break;
190         }
191         previous = next;
192         next     = next->next;
193       }
194       PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to find address %p to unmap, see PCMPIServerBegin()", addres[i]);
195     }
196   }
197 #endif
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 /*@C
202   PetscShmgetAllocateArray - allocates shared memory accessible by all MPI processes in the server
203 
204   Not Collective, only called on the first MPI process
205 
206   Input Parameters:
207 + sz  - the number of elements in the array
208 - asz - the size of an entry in the array, for example `sizeof(PetscScalar)`
209 
210   Output Parameters:
211 . addr - the address of the array
212 
213   Level: developer
214 
215   Notes:
216   Uses `PetscMalloc()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running
217 
218   Sometimes when a program crashes, shared memory IDs may remain, making it impossible to rerun the program.
219   Use
220 .vb
221   $PETSC_DIR/lib/petsc/bin/petscfreesharedmemory.sh
222 .ve to free that memory. The Linux command `ipcrm --all` or macOS command `for i in $(ipcs -m | tail -$(expr $(ipcs -m | wc -l) - 3) | tr -s ' ' | cut -d" " -f3); do ipcrm -M $i; done`
223   will also free the memory.
224 
225   Use the Unix command `ipcs -m` to see what memory IDs are currently allocated and `ipcrm -m ID` to remove a memory ID
226 
227   Under Apple macOS the following file must be copied to /Library/LaunchDaemons/sharedmemory.plist (ensure this file is owned by root and not the user)
228   and the machine rebooted before using shared memory
229 .vb
230 <?xml version="1.0" encoding="UTF-8"?>
231 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
232 <plist version="1.0">
233 <dict>
234  <key>Label</key>
235  <string>shmemsetup</string>
236  <key>UserName</key>
237  <string>root</string>
238  <key>GroupName</key>
239  <string>wheel</string>
240  <key>ProgramArguments</key>
241  <array>
242  <string>/usr/sbin/sysctl</string>
243  <string>-w</string>
244  <string>kern.sysv.shmmax=4194304000</string>
245  <string>kern.sysv.shmmni=2064</string>
246  <string>kern.sysv.shmseg=2064</string>
247  <string>kern.sysv.shmall=131072000</string>
248   </array>
249  <key>KeepAlive</key>
250  <false/>
251  <key>RunAtLoad</key>
252  <true/>
253 </dict>
254 </plist>
255 .ve
256 
257   Use the command
258 .vb
259   /usr/sbin/sysctl -a | grep shm
260 .ve
261   to confirm that the shared memory limits you have requested are available.
262 
263   Fortran Note:
264   The calling sequence is `PetscShmgetAllocateArray[Scalar,Int](PetscInt start, PetscInt len, Petsc[Scalar,Int], pointer :: d1(:), ierr)`
265 
266   Developer Note:
267   More specifically this uses `PetscMalloc()` if `!PCMPIServerUseShmget` || `!PCMPIServerActive` || `PCMPIServerInSolve`
268   where `PCMPIServerInSolve` indicates that the solve is nested inside a MPI linear solver server solve and hence should
269   not allocate the vector and matrix memory in shared memory.
270 
271 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetDeallocateArray()`
272 @*/
PetscShmgetAllocateArray(size_t sz,size_t asz,void * addr[])273 PetscErrorCode PetscShmgetAllocateArray(size_t sz, size_t asz, void *addr[])
274 {
275   PetscFunctionBegin;
276   if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscMalloc(sz * asz, addr));
277 #if defined(PETSC_HAVE_SHMGET)
278   else {
279     PetscShmgetAllocation allocation;
280     static int            shmkeys = 10;
281 
282     PetscCall(PetscCalloc(sizeof(struct _PetscShmgetAllocation), &allocation));
283     allocation->shmkey = shmkeys++;
284     allocation->sz     = sz * asz;
285     allocation->shmid  = shmget(allocation->shmkey, allocation->sz, 0666 | IPC_CREAT);
286     PetscCheck(allocation->shmid != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to schmget() of size %d with key %d %s see PetscShmgetAllocateArray()", (int)allocation->sz, allocation->shmkey, strerror(errno));
287     allocation->addr = shmat(allocation->shmid, NULL, 0);
288     PetscCheck(allocation->addr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unable to shmat() of shmid %d %s", allocation->shmid, strerror(errno));
289   #if PETSC_SIZEOF_VOID_P == 8
290     PetscCheck((uint64_t)allocation->addr != 0xffffffffffffffff, PETSC_COMM_SELF, PETSC_ERR_LIB, "shmat() of shmid %d returned 0xffffffffffffffff %s, see PCMPIServerBegin()", allocation->shmid, strerror(errno));
291   #endif
292 
293     if (!allocations) allocations = allocation;
294     else {
295       PetscShmgetAllocation next = allocations;
296       while (next->next) next = next->next;
297       next->next = allocation;
298     }
299     *addr = allocation->addr;
300     PetscCall(PetscInfo(NULL, "Allocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, allocation->shmkey, allocation->shmid, (int)allocation->sz));
301   }
302 #endif
303   PetscFunctionReturn(PETSC_SUCCESS);
304 }
305 
306 /*@C
307   PetscShmgetDeallocateArray - deallocates shared memory accessible by all MPI processes in the server
308 
309   Not Collective, only called on the first MPI process
310 
311   Input Parameter:
312 . addr - the address of array
313 
314   Level: developer
315 
316   Note:
317   Uses `PetscFree()` if `PETSC_HAVE_SHMGET` is not defined or the MPI linear solver server is not running
318 
319   Fortran Note:
320   The calling sequence is `PetscShmgetDeallocateArray[Scalar,Int](Petsc[Scalar,Int], pointer :: d1(:), ierr)`
321 
322 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`, `PetscShmgetAllocateArray()`
323 @*/
PetscShmgetDeallocateArray(void * addr[])324 PetscErrorCode PetscShmgetDeallocateArray(void *addr[])
325 {
326   PetscFunctionBegin;
327   if (!*addr) PetscFunctionReturn(PETSC_SUCCESS);
328   if (!PCMPIServerUseShmget || !PCMPIServerActive || PCMPIServerInSolve) PetscCall(PetscFree(*addr));
329 #if defined(PETSC_HAVE_SHMGET)
330   else {
331     PetscShmgetAllocation next = allocations, previous = NULL;
332 
333     while (next) {
334       if (next->addr == *addr) {
335         PetscCall(PetscInfo(NULL, "Deallocating PCMPI Server array %p shmkey %d shmid %d size %d\n", *addr, next->shmkey, next->shmid, (int)next->sz));
336         PetscCheck(!shmdt(next->addr), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to shmdt() location %s, see PCMPIServerBegin()", strerror(errno));
337         PetscCheck(!shmctl(next->shmid, IPC_RMID, NULL), PETSC_COMM_SELF, PETSC_ERR_SYS, "Unable to free shared memory addr %p key %d shmid %d %s, see PCMPIServerBegin()", *addr, next->shmkey, next->shmid, strerror(errno));
338         *addr = NULL;
339         if (previous) previous->next = next->next;
340         else allocations = next->next;
341         PetscCall(PetscFree(next));
342         PetscFunctionReturn(PETSC_SUCCESS);
343       }
344       previous = next;
345       next     = next->next;
346     }
347     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate PCMPI allocated shared memory address %p", *addr);
348   }
349 #endif
350   PetscFunctionReturn(PETSC_SUCCESS);
351 }
352 
353 #if defined(PETSC_USE_FORTRAN_BINDINGS)
354   #include <petsc/private/ftnimpl.h>
355 
356   #if defined(PETSC_HAVE_FORTRAN_CAPS)
357     #define petscshmgetallocatearrayscalar_   PETSCSHMGETALLOCATEARRAYSCALAR
358     #define petscshmgetdeallocatearrayscalar_ PETSCSHMGETDEALLOCATEARRAYSCALAR
359     #define petscshmgetallocatearrayint_      PETSCSHMGETALLOCATEARRAYINT
360     #define petscshmgetdeallocatearrayint_    PETSCSHMGETDEALLOCATEARRAYINT
361   #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
362     #define petscshmgetallocatearrayscalar_   petscshmgetallocatearrayscalar
363     #define petscshmgetdeallocatearrayscalar_ petscshmgetdeallocatearrayscalar
364     #define petscshmgetallocatearrayint_      petscshmgetallocatearrayint
365     #define petscshmgetdeallocatearrayint_    petscshmgetdeallocatearrayint
366   #endif
367 
petscshmgetallocatearrayscalar_(PetscInt * start,PetscInt * len,F90Array1d * a,PetscErrorCode * ierr PETSC_F90_2PTR_PROTO (ptrd))368 PETSC_EXTERN void petscshmgetallocatearrayscalar_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
369 {
370   PetscScalar *aa;
371 
372   *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscScalar), (void **)&aa);
373   if (*ierr) return;
374   *ierr = F90Array1dCreate(aa, MPIU_SCALAR, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd));
375 }
376 
petscshmgetdeallocatearrayscalar_(F90Array1d * a,PetscErrorCode * ierr PETSC_F90_2PTR_PROTO (ptrd))377 PETSC_EXTERN void petscshmgetdeallocatearrayscalar_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
378 {
379   PetscScalar *aa;
380 
381   *ierr = F90Array1dAccess(a, MPIU_SCALAR, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd));
382   if (*ierr) return;
383   *ierr = PetscShmgetDeallocateArray((void **)&aa);
384   if (*ierr) return;
385   *ierr = F90Array1dDestroy(a, MPIU_SCALAR PETSC_F90_2PTR_PARAM(ptrd));
386 }
387 
petscshmgetallocatearrayint_(PetscInt * start,PetscInt * len,F90Array1d * a,PetscErrorCode * ierr PETSC_F90_2PTR_PROTO (ptrd))388 PETSC_EXTERN void petscshmgetallocatearrayint_(PetscInt *start, PetscInt *len, F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
389 {
390   PetscInt *aa;
391 
392   *ierr = PetscShmgetAllocateArray(*len, sizeof(PetscInt), (void **)&aa);
393   if (*ierr) return;
394   *ierr = F90Array1dCreate(aa, MPIU_INT, *start, *len, a PETSC_F90_2PTR_PARAM(ptrd));
395 }
396 
petscshmgetdeallocatearrayint_(F90Array1d * a,PetscErrorCode * ierr PETSC_F90_2PTR_PROTO (ptrd))397 PETSC_EXTERN void petscshmgetdeallocatearrayint_(F90Array1d *a, PetscErrorCode *ierr PETSC_F90_2PTR_PROTO(ptrd))
398 {
399   PetscInt *aa;
400 
401   *ierr = F90Array1dAccess(a, MPIU_INT, (void **)&aa PETSC_F90_2PTR_PARAM(ptrd));
402   if (*ierr) return;
403   *ierr = PetscShmgetDeallocateArray((void **)&aa);
404   if (*ierr) return;
405   *ierr = F90Array1dDestroy(a, MPIU_INT PETSC_F90_2PTR_PARAM(ptrd));
406 }
407 
408 #endif
409