xref: /petsc/src/sys/error/signal.c (revision 98d129c30f3ee9fdddc40fdbc5a989b7be64f888)
1 /*
2       Routines to handle signals the program will receive.
3     Usually this will call the error handlers.
4 */
5 #include <petsc/private/petscimpl.h> /*I   "petscsys.h"   I*/
6 #include <signal.h>
7 #include <stdlib.h> /* for _Exit() */
8 
9 static PetscClassId SIGNAL_CLASSID = 0;
10 
11 struct SH {
12   PetscClassId classid;
13   PetscErrorCode (*handler)(int, void *);
14   void      *ctx;
15   struct SH *previous;
16 };
17 static struct SH *sh        = NULL;
18 static PetscBool  SignalSet = PETSC_FALSE;
19 
20 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
21    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
22 */
23 static void MyExit(void)
24 {
25   _Exit(MPI_ERR_OTHER);
26 }
27 
28 /*
29     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
30              any signal handler set by PETSc or the application code.
31 
32    Input Parameters: (depends on system)
33 .    sig - integer code indicating the type of signal
34 .    code - ??
35 .    sigcontext - ??
36 .    addr - ??
37 
38 */
39 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
40 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr)
41 #else
42 static void PetscSignalHandler_Private(int sig)
43 #endif
44 {
45   PetscErrorCode ierr;
46 
47   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, (void *)0);
48   else {
49     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted");
50     ierr = (*sh->handler)(sig, sh->ctx);
51   }
52   if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR);
53 }
54 
55 /*@
56   PetscSignalHandlerDefault - Default signal handler.
57 
58   Not Collective
59 
60   Input Parameters:
61 + sig - signal value
62 - ptr - unused pointer
63 
64   Level: advanced
65 
66   Developer Note:
67   This does not call `PetscError()`, it handles the entire error process, including possibly printing the traceback, directly
68 
69 .seealso: [](sec_errors), `PetscPushSignalHandler()`
70 @*/
71 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr)
72 {
73   PetscErrorCode ierr;
74   const char    *SIGNAME[64];
75 
76   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
77   SIGNAME[0] = "Unknown signal";
78 #if !defined(PETSC_MISSING_SIGABRT)
79   SIGNAME[SIGABRT] = "Abort";
80 #endif
81 #if !defined(PETSC_MISSING_SIGALRM)
82   SIGNAME[SIGALRM] = "Alarm";
83 #endif
84 #if !defined(PETSC_MISSING_SIGBUS)
85   SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access";
86 #endif
87 #if !defined(PETSC_MISSING_SIGCHLD)
88   SIGNAME[SIGCHLD] = "CHLD";
89 #endif
90 #if !defined(PETSC_MISSING_SIGCONT)
91   SIGNAME[SIGCONT] = "CONT";
92 #endif
93 #if !defined(PETSC_MISSING_SIGFPE)
94   SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero";
95 #endif
96 #if !defined(PETSC_MISSING_SIGHUP)
97   SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end";
98 #endif
99 #if !defined(PETSC_MISSING_SIGILL)
100   SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption";
101 #endif
102 #if !defined(PETSC_MISSING_SIGINT)
103   SIGNAME[SIGINT] = "Interrupt";
104 #endif
105 #if !defined(PETSC_MISSING_SIGKILL)
106   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
107 #endif
108 #if !defined(PETSC_MISSING_SIGPIPE)
109   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
110 #endif
111 #if !defined(PETSC_MISSING_SIGQUIT)
112   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
113 #endif
114 #if !defined(PETSC_MISSING_SIGSEGV)
115   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
116 #endif
117 #if !defined(PETSC_MISSING_SIGSYS)
118   SIGNAME[SIGSYS] = "SYS";
119 #endif
120 #if !defined(PETSC_MISSING_SIGTERM)
121   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
122 #endif
123 #if !defined(PETSC_MISSING_SIGTRAP)
124   SIGNAME[SIGTRAP] = "TRAP";
125 #endif
126 #if !defined(PETSC_MISSING_SIGTSTP)
127   SIGNAME[SIGTSTP] = "TSTP";
128 #endif
129 #if !defined(PETSC_MISSING_SIGURG)
130   SIGNAME[SIGURG] = "URG";
131 #endif
132 #if !defined(PETSC_MISSING_SIGUSR1)
133   SIGNAME[SIGUSR1] = "User 1";
134 #endif
135 #if !defined(PETSC_MISSING_SIGUSR2)
136   SIGNAME[SIGUSR2] = "User 2";
137 #endif
138 
139   signal(sig, SIG_DFL);
140   ierr = PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
141   ierr = (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
142   if (sig >= 0 && sig <= 20) ierr = (*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]);
143   else ierr = (*PetscErrorPrintf)("Caught signal\n");
144 
145   ierr = (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
146   ierr = (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
147 #if defined(PETSC_HAVE_CUDA)
148   ierr = (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
149 #endif
150 #if PetscDefined(USE_DEBUG)
151   #if !PetscDefined(HAVE_THREADSAFETY)
152   ierr = (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
153   ierr = PetscStackView(PETSC_STDOUT);
154   #endif
155 #else
156   ierr = (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
157   ierr = (*PetscErrorPrintf)("to get more information on the crash.\n");
158 #endif
159 #if !defined(PETSC_MISSING_SIGBUS)
160   if (sig == SIGSEGV || sig == SIGBUS) {
161 #else
162   if (sig == SIGSEGV) {
163 #endif
164     PetscBool debug;
165 
166     ierr = PetscMallocGetDebug(&debug, NULL, NULL);
167     if (debug) ierr = PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__);
168     else ierr = (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
169   }
170   atexit(MyExit);
171   (void)ierr;
172   PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_SIG);
173   return PETSC_SUCCESS;
174 }
175 
176 #if !defined(PETSC_SIGNAL_CAST)
177   #define PETSC_SIGNAL_CAST
178 #endif
179 
180 /*@C
181   PetscPushSignalHandler - Catches the usual fatal errors and
182   calls a user-provided routine.
183 
184   Not Collective
185 
186   Input Parameters:
187 + routine - routine to call when a signal is received
188 - ctx     - optional context needed by the routine
189 
190   Level: developer
191 
192   Note:
193   There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by
194   the loader. That information is lost with the first call to `PetscPushSignalHandler()`
195 
196 .seealso: [](sec_errors), `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
197 @*/
198 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), void *ctx)
199 {
200   struct SH *newsh;
201 
202   PetscFunctionBegin;
203   if (!SIGNAL_CLASSID) {
204     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
205     SIGNAL_CLASSID = 19;
206   }
207   if (!SignalSet && routine) {
208     /* Do not catch ABRT, CHLD, KILL */
209 #if !defined(PETSC_MISSING_SIGALRM)
210     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
211 #endif
212 #if !defined(PETSC_MISSING_SIGBUS)
213     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
214 #endif
215 #if !defined(PETSC_MISSING_SIGCONT)
216     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
217 #endif
218 #if !defined(PETSC_MISSING_SIGFPE)
219     signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
220 #endif
221 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
222     {
223       struct sigaction action;
224       sigaction(SIGHUP, NULL, &action);
225       if (action.sa_handler == SIG_IGN) {
226         PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefore not changing its signal handler\n"));
227       } else {
228         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
229       }
230     }
231 #endif
232 #if !defined(PETSC_MISSING_SIGILL)
233     signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
234 #endif
235 #if !defined(PETSC_MISSING_SIGINT)
236     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
237 #endif
238 #if !defined(PETSC_MISSING_SIGPIPE)
239     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
240 #endif
241 #if !defined(PETSC_MISSING_SIGQUIT)
242     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
243 #endif
244 #if !defined(PETSC_MISSING_SIGSEGV)
245     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
246 #endif
247 #if !defined(PETSC_MISSING_SIGSYS)
248     signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
249 #endif
250 #if !defined(PETSC_MISSING_SIGTERM)
251   #if !defined(OMPI_MAJOR_VERSION)
252     /* Open MPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
253     signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
254   #endif
255 #endif
256 #if !defined(PETSC_MISSING_SIGTRAP)
257     signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
258 #endif
259 #if !defined(PETSC_MISSING_SIGTSTP)
260     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
261 #endif
262 #if !defined(PETSC_MISSING_SIGURG)
263     signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
264 #endif
265 #if !defined(PETSC_MISSING_SIGUSR1)
266     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
267 #endif
268 #if !defined(PETSC_MISSING_SIGUSR2)
269     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
270 #endif
271     SignalSet = PETSC_TRUE;
272   }
273   if (!routine) {
274 #if !defined(PETSC_MISSING_SIGALRM)
275     /* signal(SIGALRM, SIG_DFL); */
276 #endif
277 #if !defined(PETSC_MISSING_SIGBUS)
278     signal(SIGBUS, SIG_DFL);
279 #endif
280 #if !defined(PETSC_MISSING_SIGCONT)
281     /* signal(SIGCONT, SIG_DFL); */
282 #endif
283 #if !defined(PETSC_MISSING_SIGFPE)
284     signal(SIGFPE, SIG_DFL);
285 #endif
286 #if !defined(PETSC_MISSING_SIGHUP)
287     signal(SIGHUP, SIG_DFL);
288 #endif
289 #if !defined(PETSC_MISSING_SIGILL)
290     signal(SIGILL, SIG_DFL);
291 #endif
292 #if !defined(PETSC_MISSING_SIGINT)
293     /* signal(SIGINT,  SIG_DFL); */
294 #endif
295 #if !defined(PETSC_MISSING_SIGPIPE)
296     signal(SIGPIPE, SIG_DFL);
297 #endif
298 #if !defined(PETSC_MISSING_SIGQUIT)
299     signal(SIGQUIT, SIG_DFL);
300 #endif
301 #if !defined(PETSC_MISSING_SIGSEGV)
302     signal(SIGSEGV, SIG_DFL);
303 #endif
304 #if !defined(PETSC_MISSING_SIGSYS)
305     signal(SIGSYS, SIG_DFL);
306 #endif
307 #if !defined(PETSC_MISSING_SIGTERM)
308     signal(SIGTERM, SIG_DFL);
309 #endif
310 #if !defined(PETSC_MISSING_SIGTRAP)
311     signal(SIGTRAP, SIG_DFL);
312 #endif
313 #if !defined(PETSC_MISSING_SIGTSTP)
314     /* signal(SIGTSTP, SIG_DFL); */
315 #endif
316 #if !defined(PETSC_MISSING_SIGURG)
317     signal(SIGURG, SIG_DFL);
318 #endif
319 #if !defined(PETSC_MISSING_SIGUSR1)
320     /* signal(SIGUSR1, SIG_DFL); */
321 #endif
322 #if !defined(PETSC_MISSING_SIGUSR2)
323     /* signal(SIGUSR2, SIG_DFL); */
324 #endif
325     SignalSet = PETSC_FALSE;
326   }
327   PetscCall(PetscNew(&newsh));
328   if (sh) {
329     PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
330     newsh->previous = sh;
331   } else newsh->previous = NULL;
332   newsh->handler = routine;
333   newsh->ctx     = ctx;
334   newsh->classid = SIGNAL_CLASSID;
335   sh             = newsh;
336   PetscFunctionReturn(PETSC_SUCCESS);
337 }
338 
339 /*@
340   PetscPopSignalHandler - Removes the last signal handler that was pushed.
341   If no signal handlers are left on the stack it will remove the PETSc signal handler.
342   (That is PETSc will no longer catch signals).
343 
344   Not Collective
345 
346   Level: developer
347 
348   Note:
349   There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by
350   the loader. That information is lost with the first call to `PetscPushSignalHandler()`
351 
352 .seealso: [](sec_errors), `PetscPushSignalHandler()`
353 @*/
354 PetscErrorCode PetscPopSignalHandler(void)
355 {
356   struct SH *tmp;
357 
358   PetscFunctionBegin;
359   if (!sh) PetscFunctionReturn(PETSC_SUCCESS);
360   PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
361 
362   tmp = sh;
363   sh  = sh->previous;
364   PetscCall(PetscFree(tmp));
365   if (!sh || !sh->handler) {
366 #if !defined(PETSC_MISSING_SIGALRM)
367     /* signal(SIGALRM, SIG_DFL); */
368 #endif
369 #if !defined(PETSC_MISSING_SIGBUS)
370     signal(SIGBUS, SIG_DFL);
371 #endif
372 #if !defined(PETSC_MISSING_SIGCONT)
373     /* signal(SIGCONT, SIG_DFL); */
374 #endif
375 #if !defined(PETSC_MISSING_SIGFPE)
376     signal(SIGFPE, SIG_DFL);
377 #endif
378 #if !defined(PETSC_MISSING_SIGHUP)
379     signal(SIGHUP, SIG_DFL);
380 #endif
381 #if !defined(PETSC_MISSING_SIGILL)
382     signal(SIGILL, SIG_DFL);
383 #endif
384 #if !defined(PETSC_MISSING_SIGINT)
385     /* signal(SIGINT,  SIG_DFL); */
386 #endif
387 #if !defined(PETSC_MISSING_SIGPIPE)
388     signal(SIGPIPE, SIG_DFL);
389 #endif
390 #if !defined(PETSC_MISSING_SIGQUIT)
391     signal(SIGQUIT, SIG_DFL);
392 #endif
393 #if !defined(PETSC_MISSING_SIGSEGV)
394     signal(SIGSEGV, SIG_DFL);
395 #endif
396 #if !defined(PETSC_MISSING_SIGSYS)
397     signal(SIGSYS, SIG_DFL);
398 #endif
399 #if !defined(PETSC_MISSING_SIGTERM)
400     signal(SIGTERM, SIG_DFL);
401 #endif
402 #if !defined(PETSC_MISSING_SIGTRAP)
403     signal(SIGTRAP, SIG_DFL);
404 #endif
405 #if !defined(PETSC_MISSING_SIGTSTP)
406     /* signal(SIGTSTP, SIG_DFL); */
407 #endif
408 #if !defined(PETSC_MISSING_SIGURG)
409     signal(SIGURG, SIG_DFL);
410 #endif
411 #if !defined(PETSC_MISSING_SIGUSR1)
412     /* signal(SIGUSR1, SIG_DFL); */
413 #endif
414 #if !defined(PETSC_MISSING_SIGUSR2)
415     /* signal(SIGUSR2, SIG_DFL); */
416 #endif
417     SignalSet = PETSC_FALSE;
418   } else {
419     SignalSet = PETSC_TRUE;
420   }
421   PetscFunctionReturn(PETSC_SUCCESS);
422 }
423