xref: /petsc/src/sys/error/signal.c (revision 16a9b8de2019c191953d52ed8be9fc0021dcda4c)
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h> /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId classid;
14   PetscErrorCode (*handler)(int, void *);
15   void      *ctx;
16   struct SH *previous;
17 };
18 static struct SH *sh        = NULL;
19 static PetscBool  SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void)
25 {
26   _Exit(MPI_ERR_OTHER);
27 }
28 
29 /*
30     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
31              any signal handler set by PETSc or the application code.
32 
33    Input Parameters: (depends on system)
34 .    sig - integer code indicating the type of signal
35 .    code - ??
36 .    sigcontext - ??
37 .    addr - ??
38 
39 */
40 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
41 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr)
42 #else
43 static void PetscSignalHandler_Private(int sig)
44 #endif
45 {
46   PetscErrorCode ierr;
47 
48   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, (void *)0);
49   else {
50     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted");
51     ierr = (*sh->handler)(sig, sh->ctx);
52   }
53   if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR);
54 }
55 
56 /*@
57   PetscSignalHandlerDefault - Default signal handler.
58 
59   Not Collective
60 
61   Input Parameters:
62 + sig - signal value
63 - ptr - unused pointer
64 
65   Level: advanced
66 
67   Developer Notes:
68   This does not call `PetscError()`, handles the entire error process directly
69 
70 .seealso: `PetscPushSignalHandler()`
71 @*/
72 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr)
73 {
74   PetscErrorCode ierr;
75   const char    *SIGNAME[64];
76 
77   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
78   SIGNAME[0] = "Unknown signal";
79 #if !defined(PETSC_MISSING_SIGABRT)
80   SIGNAME[SIGABRT] = "Abort";
81 #endif
82 #if !defined(PETSC_MISSING_SIGALRM)
83   SIGNAME[SIGALRM] = "Alarm";
84 #endif
85 #if !defined(PETSC_MISSING_SIGBUS)
86   SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access";
87 #endif
88 #if !defined(PETSC_MISSING_SIGCHLD)
89   SIGNAME[SIGCHLD] = "CHLD";
90 #endif
91 #if !defined(PETSC_MISSING_SIGCONT)
92   SIGNAME[SIGCONT] = "CONT";
93 #endif
94 #if !defined(PETSC_MISSING_SIGFPE)
95   SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero";
96 #endif
97 #if !defined(PETSC_MISSING_SIGHUP)
98   SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end";
99 #endif
100 #if !defined(PETSC_MISSING_SIGILL)
101   SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption";
102 #endif
103 #if !defined(PETSC_MISSING_SIGINT)
104   SIGNAME[SIGINT] = "Interrupt";
105 #endif
106 #if !defined(PETSC_MISSING_SIGKILL)
107   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
108 #endif
109 #if !defined(PETSC_MISSING_SIGPIPE)
110   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
111 #endif
112 #if !defined(PETSC_MISSING_SIGQUIT)
113   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
114 #endif
115 #if !defined(PETSC_MISSING_SIGSEGV)
116   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
117 #endif
118 #if !defined(PETSC_MISSING_SIGSYS)
119   SIGNAME[SIGSYS] = "SYS";
120 #endif
121 #if !defined(PETSC_MISSING_SIGTERM)
122   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
123 #endif
124 #if !defined(PETSC_MISSING_SIGTRAP)
125   SIGNAME[SIGTRAP] = "TRAP";
126 #endif
127 #if !defined(PETSC_MISSING_SIGTSTP)
128   SIGNAME[SIGTSTP] = "TSTP";
129 #endif
130 #if !defined(PETSC_MISSING_SIGURG)
131   SIGNAME[SIGURG] = "URG";
132 #endif
133 #if !defined(PETSC_MISSING_SIGUSR1)
134   SIGNAME[SIGUSR1] = "User 1";
135 #endif
136 #if !defined(PETSC_MISSING_SIGUSR2)
137   SIGNAME[SIGUSR2] = "User 2";
138 #endif
139 
140   signal(sig, SIG_DFL);
141   ierr = PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
142   ierr = (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
143   if (sig >= 0 && sig <= 20) ierr = (*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]);
144   else ierr = (*PetscErrorPrintf)("Caught signal\n");
145 
146   ierr = (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
147   ierr = (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
148 #if defined(PETSC_HAVE_CUDA)
149   ierr = (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
150 #endif
151 #if PetscDefined(USE_DEBUG)
152   #if !PetscDefined(HAVE_THREADSAFETY)
153   ierr = (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
154   ierr = PetscStackView(PETSC_STDOUT);
155   #endif
156 #else
157   ierr = (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
158   ierr = (*PetscErrorPrintf)("to get more information on the crash.\n");
159 #endif
160 #if !defined(PETSC_MISSING_SIGBUS)
161   if (sig == SIGSEGV || sig == SIGBUS) {
162 #else
163   if (sig == SIGSEGV) {
164 #endif
165     PetscBool debug;
166 
167     ierr = PetscMallocGetDebug(&debug, NULL, NULL);
168     if (debug) ierr = PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__);
169     else ierr = (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
170   }
171   atexit(MyExit);
172   (void)ierr;
173   PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_SIG);
174   return PETSC_SUCCESS;
175 }
176 
177 #if !defined(PETSC_SIGNAL_CAST)
178   #define PETSC_SIGNAL_CAST
179 #endif
180 
181 /*@C
182   PetscPushSignalHandler - Catches the usual fatal errors and
183   calls a user-provided routine.
184 
185   Not Collective
186 
187   Input Parameters:
188 + routine - routine to call when a signal is received
189 - ctx     - optional context needed by the routine
190 
191   Level: developer
192 
193   Note:
194   There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by
195   the loader. That information is lost with the first call to `PetscPushSignalHandler()`
196 
197 .seealso: [](sec_errors), `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
198 @*/
199 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), void *ctx)
200 {
201   struct SH *newsh;
202 
203   PetscFunctionBegin;
204   if (!SIGNAL_CLASSID) {
205     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
206     SIGNAL_CLASSID = 19;
207   }
208   if (!SignalSet && routine) {
209     /* Do not catch ABRT, CHLD, KILL */
210 #if !defined(PETSC_MISSING_SIGALRM)
211     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
212 #endif
213 #if !defined(PETSC_MISSING_SIGBUS)
214     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
215 #endif
216 #if !defined(PETSC_MISSING_SIGCONT)
217     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
218 #endif
219 #if !defined(PETSC_MISSING_SIGFPE)
220     signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
221 #endif
222 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
223     {
224       struct sigaction action;
225       sigaction(SIGHUP, NULL, &action);
226       if (action.sa_handler == SIG_IGN) {
227         PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefore not changing its signal handler\n"));
228       } else {
229         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
230       }
231     }
232 #endif
233 #if !defined(PETSC_MISSING_SIGILL)
234     signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
235 #endif
236 #if !defined(PETSC_MISSING_SIGINT)
237     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
238 #endif
239 #if !defined(PETSC_MISSING_SIGPIPE)
240     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
241 #endif
242 #if !defined(PETSC_MISSING_SIGQUIT)
243     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
244 #endif
245 #if !defined(PETSC_MISSING_SIGSEGV)
246     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
247 #endif
248 #if !defined(PETSC_MISSING_SIGSYS)
249     signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
250 #endif
251 #if !defined(PETSC_MISSING_SIGTERM)
252   #if !defined(OMPI_MAJOR_VERSION)
253     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
254     signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
255   #endif
256 #endif
257 #if !defined(PETSC_MISSING_SIGTRAP)
258     signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
259 #endif
260 #if !defined(PETSC_MISSING_SIGTSTP)
261     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
262 #endif
263 #if !defined(PETSC_MISSING_SIGURG)
264     signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
265 #endif
266 #if !defined(PETSC_MISSING_SIGUSR1)
267     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
268 #endif
269 #if !defined(PETSC_MISSING_SIGUSR2)
270     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
271 #endif
272     SignalSet = PETSC_TRUE;
273   }
274   if (!routine) {
275 #if !defined(PETSC_MISSING_SIGALRM)
276     /* signal(SIGALRM, SIG_DFL); */
277 #endif
278 #if !defined(PETSC_MISSING_SIGBUS)
279     signal(SIGBUS, SIG_DFL);
280 #endif
281 #if !defined(PETSC_MISSING_SIGCONT)
282     /* signal(SIGCONT, SIG_DFL); */
283 #endif
284 #if !defined(PETSC_MISSING_SIGFPE)
285     signal(SIGFPE, SIG_DFL);
286 #endif
287 #if !defined(PETSC_MISSING_SIGHUP)
288     signal(SIGHUP, SIG_DFL);
289 #endif
290 #if !defined(PETSC_MISSING_SIGILL)
291     signal(SIGILL, SIG_DFL);
292 #endif
293 #if !defined(PETSC_MISSING_SIGINT)
294     /* signal(SIGINT,  SIG_DFL); */
295 #endif
296 #if !defined(PETSC_MISSING_SIGPIPE)
297     signal(SIGPIPE, SIG_DFL);
298 #endif
299 #if !defined(PETSC_MISSING_SIGQUIT)
300     signal(SIGQUIT, SIG_DFL);
301 #endif
302 #if !defined(PETSC_MISSING_SIGSEGV)
303     signal(SIGSEGV, SIG_DFL);
304 #endif
305 #if !defined(PETSC_MISSING_SIGSYS)
306     signal(SIGSYS, SIG_DFL);
307 #endif
308 #if !defined(PETSC_MISSING_SIGTERM)
309     signal(SIGTERM, SIG_DFL);
310 #endif
311 #if !defined(PETSC_MISSING_SIGTRAP)
312     signal(SIGTRAP, SIG_DFL);
313 #endif
314 #if !defined(PETSC_MISSING_SIGTSTP)
315     /* signal(SIGTSTP, SIG_DFL); */
316 #endif
317 #if !defined(PETSC_MISSING_SIGURG)
318     signal(SIGURG, SIG_DFL);
319 #endif
320 #if !defined(PETSC_MISSING_SIGUSR1)
321     /* signal(SIGUSR1, SIG_DFL); */
322 #endif
323 #if !defined(PETSC_MISSING_SIGUSR2)
324     /* signal(SIGUSR2, SIG_DFL); */
325 #endif
326     SignalSet = PETSC_FALSE;
327   }
328   PetscCall(PetscNew(&newsh));
329   if (sh) {
330     PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
331     newsh->previous = sh;
332   } else newsh->previous = NULL;
333   newsh->handler = routine;
334   newsh->ctx     = ctx;
335   newsh->classid = SIGNAL_CLASSID;
336   sh             = newsh;
337   PetscFunctionReturn(PETSC_SUCCESS);
338 }
339 
340 /*@
341   PetscPopSignalHandler - Removes the last signal handler that was pushed.
342   If no signal handlers are left on the stack it will remove the PETSc signal handler.
343   (That is PETSc will no longer catch signals).
344 
345   Not Collective
346 
347   Level: developer
348 
349   Note:
350   There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by
351   the loader. That information is lost with the first call to `PetscPushSignalHandler()`
352 
353 .seealso: [](sec_errors), `PetscPushSignalHandler()`
354 @*/
355 PetscErrorCode PetscPopSignalHandler(void)
356 {
357   struct SH *tmp;
358 
359   PetscFunctionBegin;
360   if (!sh) PetscFunctionReturn(PETSC_SUCCESS);
361   PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
362 
363   tmp = sh;
364   sh  = sh->previous;
365   PetscCall(PetscFree(tmp));
366   if (!sh || !sh->handler) {
367 #if !defined(PETSC_MISSING_SIGALRM)
368     /* signal(SIGALRM, SIG_DFL); */
369 #endif
370 #if !defined(PETSC_MISSING_SIGBUS)
371     signal(SIGBUS, SIG_DFL);
372 #endif
373 #if !defined(PETSC_MISSING_SIGCONT)
374     /* signal(SIGCONT, SIG_DFL); */
375 #endif
376 #if !defined(PETSC_MISSING_SIGFPE)
377     signal(SIGFPE, SIG_DFL);
378 #endif
379 #if !defined(PETSC_MISSING_SIGHUP)
380     signal(SIGHUP, SIG_DFL);
381 #endif
382 #if !defined(PETSC_MISSING_SIGILL)
383     signal(SIGILL, SIG_DFL);
384 #endif
385 #if !defined(PETSC_MISSING_SIGINT)
386     /* signal(SIGINT,  SIG_DFL); */
387 #endif
388 #if !defined(PETSC_MISSING_SIGPIPE)
389     signal(SIGPIPE, SIG_DFL);
390 #endif
391 #if !defined(PETSC_MISSING_SIGQUIT)
392     signal(SIGQUIT, SIG_DFL);
393 #endif
394 #if !defined(PETSC_MISSING_SIGSEGV)
395     signal(SIGSEGV, SIG_DFL);
396 #endif
397 #if !defined(PETSC_MISSING_SIGSYS)
398     signal(SIGSYS, SIG_DFL);
399 #endif
400 #if !defined(PETSC_MISSING_SIGTERM)
401     signal(SIGTERM, SIG_DFL);
402 #endif
403 #if !defined(PETSC_MISSING_SIGTRAP)
404     signal(SIGTRAP, SIG_DFL);
405 #endif
406 #if !defined(PETSC_MISSING_SIGTSTP)
407     /* signal(SIGTSTP, SIG_DFL); */
408 #endif
409 #if !defined(PETSC_MISSING_SIGURG)
410     signal(SIGURG, SIG_DFL);
411 #endif
412 #if !defined(PETSC_MISSING_SIGUSR1)
413     /* signal(SIGUSR1, SIG_DFL); */
414 #endif
415 #if !defined(PETSC_MISSING_SIGUSR2)
416     /* signal(SIGUSR2, SIG_DFL); */
417 #endif
418     SignalSet = PETSC_FALSE;
419   } else {
420     SignalSet = PETSC_TRUE;
421   }
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424