xref: /petsc/src/sys/error/signal.c (revision 607e733f3db3ee7f6f605a13295c517df8dbb9c9)
1 /*
2       Routines to handle signals the program will receive.
3     Usually this will call the error handlers.
4 */
5 #include <petsc/private/petscimpl.h> /*I   "petscsys.h"   I*/
6 #include <signal.h>
7 #include <stdlib.h> /* for _Exit() */
8 
9 static PetscClassId SIGNAL_CLASSID = 0;
10 
11 struct SH {
12   PetscClassId classid;
13   PetscErrorCode (*handler)(int, void *);
14   void      *ctx;
15   struct SH *previous;
16 };
17 static struct SH *sh        = NULL;
18 static PetscBool  SignalSet = PETSC_FALSE;
19 
20 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
21    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
22 */
23 static void MyExit(void)
24 {
25   _Exit(MPI_ERR_OTHER);
26 }
27 
28 /*
29     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
30              any signal handler set by PETSc or the application code.
31 
32    Input Parameters: (depends on system)
33 .    sig - integer code indicating the type of signal
34 .    code - ??
35 .    sigcontext - ??
36 .    addr - ??
37 
38 */
39 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
40 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr)
41 #else
42 static void PetscSignalHandler_Private(int sig)
43 #endif
44 {
45   PetscErrorCode ierr;
46 
47   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, NULL);
48   else {
49     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted");
50     ierr = (*sh->handler)(sig, sh->ctx);
51   }
52   if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR);
53 }
54 
55 /*@
56   PetscSignalHandlerDefault - Default signal handler.
57 
58   Not Collective
59 
60   Input Parameters:
61 + sig - signal value
62 - ptr - unused pointer
63 
64   Level: advanced
65 
66   Developer Note:
67   This does not call `PetscError()`, it handles the entire error process, including possibly printing the traceback, directly
68 
69 .seealso: [](sec_errors), `PetscPushSignalHandler()`
70 @*/
71 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr)
72 {
73   const char *SIGNAME[64];
74 
75   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
76   SIGNAME[0] = "Unknown signal";
77 #if !defined(PETSC_MISSING_SIGABRT)
78   SIGNAME[SIGABRT] = "Abort";
79 #endif
80 #if !defined(PETSC_MISSING_SIGALRM)
81   SIGNAME[SIGALRM] = "Alarm";
82 #endif
83 #if !defined(PETSC_MISSING_SIGBUS)
84   SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access";
85 #endif
86 #if !defined(PETSC_MISSING_SIGCHLD)
87   SIGNAME[SIGCHLD] = "CHLD";
88 #endif
89 #if !defined(PETSC_MISSING_SIGCONT)
90   SIGNAME[SIGCONT] = "CONT";
91 #endif
92 #if !defined(PETSC_MISSING_SIGFPE)
93   SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero";
94 #endif
95 #if !defined(PETSC_MISSING_SIGHUP)
96   SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end";
97 #endif
98 #if !defined(PETSC_MISSING_SIGILL)
99   SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption";
100 #endif
101 #if !defined(PETSC_MISSING_SIGINT)
102   SIGNAME[SIGINT] = "Interrupt";
103 #endif
104 #if !defined(PETSC_MISSING_SIGKILL)
105   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
106 #endif
107 #if !defined(PETSC_MISSING_SIGPIPE)
108   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
109 #endif
110 #if !defined(PETSC_MISSING_SIGQUIT)
111   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
112 #endif
113 #if !defined(PETSC_MISSING_SIGSEGV)
114   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
115 #endif
116 #if !defined(PETSC_MISSING_SIGSYS)
117   SIGNAME[SIGSYS] = "SYS";
118 #endif
119 #if !defined(PETSC_MISSING_SIGTERM)
120   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
121 #endif
122 #if !defined(PETSC_MISSING_SIGTRAP)
123   SIGNAME[SIGTRAP] = "TRAP";
124 #endif
125 #if !defined(PETSC_MISSING_SIGTSTP)
126   SIGNAME[SIGTSTP] = "TSTP";
127 #endif
128 #if !defined(PETSC_MISSING_SIGURG)
129   SIGNAME[SIGURG] = "URG";
130 #endif
131 #if !defined(PETSC_MISSING_SIGUSR1)
132   SIGNAME[SIGUSR1] = "User 1";
133 #endif
134 #if !defined(PETSC_MISSING_SIGUSR2)
135   SIGNAME[SIGUSR2] = "User 2";
136 #endif
137 
138   signal(sig, SIG_DFL);
139   (void)PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
140   (void)(*PetscErrorPrintf)("------------------------------------------------------------------------\n");
141   if (sig >= 0 && sig <= 20) (void)(*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]);
142   else (void)(*PetscErrorPrintf)("Caught signal\n");
143 
144   (void)(*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
145   (void)(*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
146 #if defined(PETSC_HAVE_CUDA)
147   (void)(*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
148 #endif
149 #if PetscDefined(USE_DEBUG)
150   #if !PetscDefined(HAVE_THREADSAFETY)
151   (void)(*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
152   (void)PetscStackView(PETSC_STDOUT);
153   #endif
154 #else
155   (void)(*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
156   (void)(*PetscErrorPrintf)("to get more information on the crash.\n");
157 #endif
158 #if !defined(PETSC_MISSING_SIGBUS)
159   if (sig == SIGSEGV || sig == SIGBUS) {
160 #else
161   if (sig == SIGSEGV) {
162 #endif
163     PetscBool debug;
164 
165     (void)PetscMallocGetDebug(&debug, NULL, NULL);
166     if (debug) (void)PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__);
167     else (void)(*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
168   }
169   atexit(MyExit);
170   PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_SIG);
171   return PETSC_SUCCESS;
172 }
173 
174 #if !defined(PETSC_SIGNAL_CAST)
175   #define PETSC_SIGNAL_CAST
176 #endif
177 
178 /*@C
179   PetscPushSignalHandler - Catches the usual fatal errors and
180   calls a user-provided routine.
181 
182   Not Collective, No Fortran Support
183 
184   Input Parameters:
185 + routine - routine to call when a signal is received
186 - ctx     - optional context needed by the routine
187 
188   Level: developer
189 
190   Note:
191   There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by
192   the loader. That information is lost with the first call to `PetscPushSignalHandler()`
193 
194 .seealso: [](sec_errors), `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
195 @*/
196 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), PetscCtx ctx)
197 {
198   struct SH *newsh;
199 
200   PetscFunctionBegin;
201   if (!SIGNAL_CLASSID) {
202     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
203     SIGNAL_CLASSID = 19;
204   }
205   if (!SignalSet && routine) {
206     /* Do not catch ABRT, CHLD, KILL */
207 #if !defined(PETSC_MISSING_SIGALRM)
208     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
209 #endif
210 #if !defined(PETSC_MISSING_SIGBUS)
211     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
212 #endif
213 #if !defined(PETSC_MISSING_SIGCONT)
214     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
215 #endif
216 #if !defined(PETSC_MISSING_SIGFPE)
217     signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
218 #endif
219 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
220     {
221       struct sigaction action;
222       sigaction(SIGHUP, NULL, &action);
223       if (action.sa_handler == SIG_IGN) {
224         PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefore not changing its signal handler\n"));
225       } else {
226         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
227       }
228     }
229 #endif
230 #if !defined(PETSC_MISSING_SIGILL)
231     signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
232 #endif
233 #if !defined(PETSC_MISSING_SIGINT)
234     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
235 #endif
236 #if !defined(PETSC_MISSING_SIGPIPE)
237     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
238 #endif
239 #if !defined(PETSC_MISSING_SIGQUIT)
240     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
241 #endif
242 #if !defined(PETSC_MISSING_SIGSEGV)
243     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
244 #endif
245 #if !defined(PETSC_MISSING_SIGSYS)
246     signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
247 #endif
248 #if !defined(PETSC_MISSING_SIGTERM)
249   #if !defined(PETSC_HAVE_OPENMPI)
250     /* Open MPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
251     signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
252   #endif
253 #endif
254 #if !defined(PETSC_MISSING_SIGTRAP)
255     signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
256 #endif
257 #if !defined(PETSC_MISSING_SIGTSTP)
258     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
259 #endif
260 #if !defined(PETSC_MISSING_SIGURG)
261     signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
262 #endif
263 #if !defined(PETSC_MISSING_SIGUSR1)
264     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
265 #endif
266 #if !defined(PETSC_MISSING_SIGUSR2)
267     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
268 #endif
269     SignalSet = PETSC_TRUE;
270   }
271   if (!routine) {
272 #if !defined(PETSC_MISSING_SIGALRM)
273     /* signal(SIGALRM, SIG_DFL); */
274 #endif
275 #if !defined(PETSC_MISSING_SIGBUS)
276     signal(SIGBUS, SIG_DFL);
277 #endif
278 #if !defined(PETSC_MISSING_SIGCONT)
279     /* signal(SIGCONT, SIG_DFL); */
280 #endif
281 #if !defined(PETSC_MISSING_SIGFPE)
282     signal(SIGFPE, SIG_DFL);
283 #endif
284 #if !defined(PETSC_MISSING_SIGHUP)
285     signal(SIGHUP, SIG_DFL);
286 #endif
287 #if !defined(PETSC_MISSING_SIGILL)
288     signal(SIGILL, SIG_DFL);
289 #endif
290 #if !defined(PETSC_MISSING_SIGINT)
291     /* signal(SIGINT,  SIG_DFL); */
292 #endif
293 #if !defined(PETSC_MISSING_SIGPIPE)
294     signal(SIGPIPE, SIG_DFL);
295 #endif
296 #if !defined(PETSC_MISSING_SIGQUIT)
297     signal(SIGQUIT, SIG_DFL);
298 #endif
299 #if !defined(PETSC_MISSING_SIGSEGV)
300     signal(SIGSEGV, SIG_DFL);
301 #endif
302 #if !defined(PETSC_MISSING_SIGSYS)
303     signal(SIGSYS, SIG_DFL);
304 #endif
305 #if !defined(PETSC_MISSING_SIGTERM)
306     signal(SIGTERM, SIG_DFL);
307 #endif
308 #if !defined(PETSC_MISSING_SIGTRAP)
309     signal(SIGTRAP, SIG_DFL);
310 #endif
311 #if !defined(PETSC_MISSING_SIGTSTP)
312     /* signal(SIGTSTP, SIG_DFL); */
313 #endif
314 #if !defined(PETSC_MISSING_SIGURG)
315     signal(SIGURG, SIG_DFL);
316 #endif
317 #if !defined(PETSC_MISSING_SIGUSR1)
318     /* signal(SIGUSR1, SIG_DFL); */
319 #endif
320 #if !defined(PETSC_MISSING_SIGUSR2)
321     /* signal(SIGUSR2, SIG_DFL); */
322 #endif
323     SignalSet = PETSC_FALSE;
324   }
325   PetscCall(PetscNew(&newsh));
326   if (sh) {
327     PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
328     newsh->previous = sh;
329   } else newsh->previous = NULL;
330   newsh->handler = routine;
331   newsh->ctx     = ctx;
332   newsh->classid = SIGNAL_CLASSID;
333   sh             = newsh;
334   PetscFunctionReturn(PETSC_SUCCESS);
335 }
336 
337 /*@
338   PetscPopSignalHandler - Removes the last signal handler that was pushed.
339   If no signal handlers are left on the stack it will remove the PETSc signal handler.
340   (That is PETSc will no longer catch signals).
341 
342   Not Collective
343 
344   Level: developer
345 
346   Note:
347   There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by
348   the loader. That information is lost with the first call to `PetscPushSignalHandler()`
349 
350 .seealso: [](sec_errors), `PetscPushSignalHandler()`
351 @*/
352 PetscErrorCode PetscPopSignalHandler(void)
353 {
354   struct SH *tmp;
355 
356   PetscFunctionBegin;
357   if (!sh) PetscFunctionReturn(PETSC_SUCCESS);
358   PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
359 
360   tmp = sh;
361   sh  = sh->previous;
362   PetscCall(PetscFree(tmp));
363   if (!sh || !sh->handler) {
364 #if !defined(PETSC_MISSING_SIGALRM)
365     /* signal(SIGALRM, SIG_DFL); */
366 #endif
367 #if !defined(PETSC_MISSING_SIGBUS)
368     signal(SIGBUS, SIG_DFL);
369 #endif
370 #if !defined(PETSC_MISSING_SIGCONT)
371     /* signal(SIGCONT, SIG_DFL); */
372 #endif
373 #if !defined(PETSC_MISSING_SIGFPE)
374     signal(SIGFPE, SIG_DFL);
375 #endif
376 #if !defined(PETSC_MISSING_SIGHUP)
377     signal(SIGHUP, SIG_DFL);
378 #endif
379 #if !defined(PETSC_MISSING_SIGILL)
380     signal(SIGILL, SIG_DFL);
381 #endif
382 #if !defined(PETSC_MISSING_SIGINT)
383     /* signal(SIGINT,  SIG_DFL); */
384 #endif
385 #if !defined(PETSC_MISSING_SIGPIPE)
386     signal(SIGPIPE, SIG_DFL);
387 #endif
388 #if !defined(PETSC_MISSING_SIGQUIT)
389     signal(SIGQUIT, SIG_DFL);
390 #endif
391 #if !defined(PETSC_MISSING_SIGSEGV)
392     signal(SIGSEGV, SIG_DFL);
393 #endif
394 #if !defined(PETSC_MISSING_SIGSYS)
395     signal(SIGSYS, SIG_DFL);
396 #endif
397 #if !defined(PETSC_MISSING_SIGTERM)
398     signal(SIGTERM, SIG_DFL);
399 #endif
400 #if !defined(PETSC_MISSING_SIGTRAP)
401     signal(SIGTRAP, SIG_DFL);
402 #endif
403 #if !defined(PETSC_MISSING_SIGTSTP)
404     /* signal(SIGTSTP, SIG_DFL); */
405 #endif
406 #if !defined(PETSC_MISSING_SIGURG)
407     signal(SIGURG, SIG_DFL);
408 #endif
409 #if !defined(PETSC_MISSING_SIGUSR1)
410     /* signal(SIGUSR1, SIG_DFL); */
411 #endif
412 #if !defined(PETSC_MISSING_SIGUSR2)
413     /* signal(SIGUSR2, SIG_DFL); */
414 #endif
415     SignalSet = PETSC_FALSE;
416   } else {
417     SignalSet = PETSC_TRUE;
418   }
419   PetscFunctionReturn(PETSC_SUCCESS);
420 }
421