xref: /petsc/src/sys/error/signal.c (revision 750b007cd8d816cecd9de99077bb0a703b4cf61a) !
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h> /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId classid;
14   PetscErrorCode (*handler)(int, void *);
15   void      *ctx;
16   struct SH *previous;
17 };
18 static struct SH *sh        = NULL;
19 static PetscBool  SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void) {
25   _Exit(MPI_ERR_OTHER);
26 }
27 
28 /*
29     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
30              any signal handler set by PETSc or the application code.
31 
32    Input Parameters: (depends on system)
33 .    sig - integer code indicating the type of signal
34 .    code - ??
35 .    sigcontext - ??
36 .    addr - ??
37 
38 */
39 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
40 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr)
41 #else
42 static void PetscSignalHandler_Private(int sig)
43 #endif
44 {
45   PetscErrorCode ierr;
46 
47   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, (void *)0);
48   else {
49     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted");
50     ierr = (*sh->handler)(sig, sh->ctx);
51   }
52   if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR);
53 }
54 
55 /*@
56    PetscSignalHandlerDefault - Default signal handler.
57 
58    Not Collective
59 
60    Input Parameters:
61 +  sig - signal value
62 -  ptr - unused pointer
63 
64    Developer Note:
65    This does not call `PetscError()`, handles the entire error process directly
66 
67    Level: advanced
68 
69 @*/
70 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr) {
71   const char *SIGNAME[64];
72 
73   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
74   SIGNAME[0] = "Unknown signal";
75 #if !defined(PETSC_MISSING_SIGABRT)
76   SIGNAME[SIGABRT] = "Abort";
77 #endif
78 #if !defined(PETSC_MISSING_SIGALRM)
79   SIGNAME[SIGALRM] = "Alarm";
80 #endif
81 #if !defined(PETSC_MISSING_SIGBUS)
82   SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access";
83 #endif
84 #if !defined(PETSC_MISSING_SIGCHLD)
85   SIGNAME[SIGCHLD] = "CHLD";
86 #endif
87 #if !defined(PETSC_MISSING_SIGCONT)
88   SIGNAME[SIGCONT] = "CONT";
89 #endif
90 #if !defined(PETSC_MISSING_SIGFPE)
91   SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero";
92 #endif
93 #if !defined(PETSC_MISSING_SIGHUP)
94   SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end";
95 #endif
96 #if !defined(PETSC_MISSING_SIGILL)
97   SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption";
98 #endif
99 #if !defined(PETSC_MISSING_SIGINT)
100   SIGNAME[SIGINT] = "Interrupt";
101 #endif
102 #if !defined(PETSC_MISSING_SIGKILL)
103   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
104 #endif
105 #if !defined(PETSC_MISSING_SIGPIPE)
106   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
107 #endif
108 #if !defined(PETSC_MISSING_SIGQUIT)
109   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
110 #endif
111 #if !defined(PETSC_MISSING_SIGSEGV)
112   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
113 #endif
114 #if !defined(PETSC_MISSING_SIGSYS)
115   SIGNAME[SIGSYS] = "SYS";
116 #endif
117 #if !defined(PETSC_MISSING_SIGTERM)
118   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
119 #endif
120 #if !defined(PETSC_MISSING_SIGTRAP)
121   SIGNAME[SIGTRAP] = "TRAP";
122 #endif
123 #if !defined(PETSC_MISSING_SIGTSTP)
124   SIGNAME[SIGTSTP] = "TSTP";
125 #endif
126 #if !defined(PETSC_MISSING_SIGURG)
127   SIGNAME[SIGURG] = "URG";
128 #endif
129 #if !defined(PETSC_MISSING_SIGUSR1)
130   SIGNAME[SIGUSR1] = "User 1";
131 #endif
132 #if !defined(PETSC_MISSING_SIGUSR2)
133   SIGNAME[SIGUSR2] = "User 2";
134 #endif
135 
136   signal(sig, SIG_DFL);
137   PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
138   (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
139   if (sig >= 0 && sig <= 20) (*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]);
140   else (*PetscErrorPrintf)("Caught signal\n");
141 
142   (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
143   (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
144 #if defined(PETSC_HAVE_CUDA)
145   (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
146 #endif
147 #if PetscDefined(USE_DEBUG)
148   (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
149   PetscStackView(PETSC_STDOUT);
150 #else
151   (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
152   (*PetscErrorPrintf)("to get more information on the crash.\n");
153 #endif
154 #if !defined(PETSC_MISSING_SIGBUS)
155   if (sig == SIGSEGV || sig == SIGBUS) {
156 #else
157   if (sig == SIGSEGV) {
158 #endif
159     PetscBool debug;
160 
161     PetscMallocGetDebug(&debug, NULL, NULL);
162     if (debug) PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__);
163     else (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
164   }
165   atexit(MyExit);
166   PETSCABORT(PETSC_COMM_WORLD, (int)PETSC_ERR_SIG);
167   return 0;
168 }
169 
170 #if !defined(PETSC_SIGNAL_CAST)
171 #define PETSC_SIGNAL_CAST
172 #endif
173 
174 /*@C
175    PetscPushSignalHandler - Catches the usual fatal errors and
176    calls a user-provided routine.
177 
178    Not Collective
179 
180    Input Parameters:
181 +  routine - routine to call when a signal is received
182 -  ctx - optional context needed by the routine
183 
184   Level: developer
185 
186 .seealso: `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
187 @*/
188 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), void *ctx) {
189   struct SH *newsh;
190 
191   PetscFunctionBegin;
192   if (!SIGNAL_CLASSID) {
193     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
194     SIGNAL_CLASSID = 19;
195   }
196   if (!SignalSet && routine) {
197     /* Do not catch ABRT, CHLD, KILL */
198 #if !defined(PETSC_MISSING_SIGALRM)
199     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
200 #endif
201 #if !defined(PETSC_MISSING_SIGBUS)
202     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
203 #endif
204 #if !defined(PETSC_MISSING_SIGCONT)
205     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
206 #endif
207 #if !defined(PETSC_MISSING_SIGFPE)
208     signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
209 #endif
210 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
211     {
212       struct sigaction action;
213       sigaction(SIGHUP, NULL, &action);
214       if (action.sa_handler == SIG_IGN) {
215         PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefor not changing its signal handler\n"));
216       } else {
217         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
218       }
219     }
220 #endif
221 #if !defined(PETSC_MISSING_SIGILL)
222     signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
223 #endif
224 #if !defined(PETSC_MISSING_SIGINT)
225     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
226 #endif
227 #if !defined(PETSC_MISSING_SIGPIPE)
228     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
229 #endif
230 #if !defined(PETSC_MISSING_SIGQUIT)
231     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
232 #endif
233 #if !defined(PETSC_MISSING_SIGSEGV)
234     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
235 #endif
236 #if !defined(PETSC_MISSING_SIGSYS)
237     signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
238 #endif
239 #if !defined(PETSC_MISSING_SIGTERM)
240 #if !defined(OMPI_MAJOR_VERSION)
241     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
242     signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
243 #endif
244 #endif
245 #if !defined(PETSC_MISSING_SIGTRAP)
246     signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
247 #endif
248 #if !defined(PETSC_MISSING_SIGTSTP)
249     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
250 #endif
251 #if !defined(PETSC_MISSING_SIGURG)
252     signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
253 #endif
254 #if !defined(PETSC_MISSING_SIGUSR1)
255     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
256 #endif
257 #if !defined(PETSC_MISSING_SIGUSR2)
258     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
259 #endif
260     SignalSet = PETSC_TRUE;
261   }
262   if (!routine) {
263 #if !defined(PETSC_MISSING_SIGALRM)
264     /* signal(SIGALRM, SIG_DFL); */
265 #endif
266 #if !defined(PETSC_MISSING_SIGBUS)
267     signal(SIGBUS, SIG_DFL);
268 #endif
269 #if !defined(PETSC_MISSING_SIGCONT)
270     /* signal(SIGCONT, SIG_DFL); */
271 #endif
272 #if !defined(PETSC_MISSING_SIGFPE)
273     signal(SIGFPE, SIG_DFL);
274 #endif
275 #if !defined(PETSC_MISSING_SIGHUP)
276     signal(SIGHUP, SIG_DFL);
277 #endif
278 #if !defined(PETSC_MISSING_SIGILL)
279     signal(SIGILL, SIG_DFL);
280 #endif
281 #if !defined(PETSC_MISSING_SIGINT)
282     /* signal(SIGINT,  SIG_DFL); */
283 #endif
284 #if !defined(PETSC_MISSING_SIGPIPE)
285     signal(SIGPIPE, SIG_DFL);
286 #endif
287 #if !defined(PETSC_MISSING_SIGQUIT)
288     signal(SIGQUIT, SIG_DFL);
289 #endif
290 #if !defined(PETSC_MISSING_SIGSEGV)
291     signal(SIGSEGV, SIG_DFL);
292 #endif
293 #if !defined(PETSC_MISSING_SIGSYS)
294     signal(SIGSYS, SIG_DFL);
295 #endif
296 #if !defined(PETSC_MISSING_SIGTERM)
297     signal(SIGTERM, SIG_DFL);
298 #endif
299 #if !defined(PETSC_MISSING_SIGTRAP)
300     signal(SIGTRAP, SIG_DFL);
301 #endif
302 #if !defined(PETSC_MISSING_SIGTSTP)
303     /* signal(SIGTSTP, SIG_DFL); */
304 #endif
305 #if !defined(PETSC_MISSING_SIGURG)
306     signal(SIGURG, SIG_DFL);
307 #endif
308 #if !defined(PETSC_MISSING_SIGUSR1)
309     /* signal(SIGUSR1, SIG_DFL); */
310 #endif
311 #if !defined(PETSC_MISSING_SIGUSR2)
312     /* signal(SIGUSR2, SIG_DFL); */
313 #endif
314     SignalSet = PETSC_FALSE;
315   }
316   PetscCall(PetscNew(&newsh));
317   if (sh) {
318     PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
319     newsh->previous = sh;
320   } else newsh->previous = NULL;
321   newsh->handler = routine;
322   newsh->ctx     = ctx;
323   newsh->classid = SIGNAL_CLASSID;
324   sh             = newsh;
325   PetscFunctionReturn(0);
326 }
327 
328 /*@
329    PetscPopSignalHandler - Removes the most last signal handler that was pushed.
330        If no signal handlers are left on the stack it will remove the PETSc signal handler.
331        (That is PETSc will no longer catch signals).
332 
333    Not Collective
334 
335   Level: developer
336 
337 .seealso: `PetscPushSignalHandler()`
338 @*/
339 PetscErrorCode PetscPopSignalHandler(void) {
340   struct SH *tmp;
341 
342   PetscFunctionBegin;
343   if (!sh) PetscFunctionReturn(0);
344   PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
345 
346   tmp = sh;
347   sh  = sh->previous;
348   PetscCall(PetscFree(tmp));
349   if (!sh || !sh->handler) {
350 #if !defined(PETSC_MISSING_SIGALRM)
351     /* signal(SIGALRM, SIG_DFL); */
352 #endif
353 #if !defined(PETSC_MISSING_SIGBUS)
354     signal(SIGBUS, SIG_DFL);
355 #endif
356 #if !defined(PETSC_MISSING_SIGCONT)
357     /* signal(SIGCONT, SIG_DFL); */
358 #endif
359 #if !defined(PETSC_MISSING_SIGFPE)
360     signal(SIGFPE, SIG_DFL);
361 #endif
362 #if !defined(PETSC_MISSING_SIGHUP)
363     signal(SIGHUP, SIG_DFL);
364 #endif
365 #if !defined(PETSC_MISSING_SIGILL)
366     signal(SIGILL, SIG_DFL);
367 #endif
368 #if !defined(PETSC_MISSING_SIGINT)
369     /* signal(SIGINT,  SIG_DFL); */
370 #endif
371 #if !defined(PETSC_MISSING_SIGPIPE)
372     signal(SIGPIPE, SIG_DFL);
373 #endif
374 #if !defined(PETSC_MISSING_SIGQUIT)
375     signal(SIGQUIT, SIG_DFL);
376 #endif
377 #if !defined(PETSC_MISSING_SIGSEGV)
378     signal(SIGSEGV, SIG_DFL);
379 #endif
380 #if !defined(PETSC_MISSING_SIGSYS)
381     signal(SIGSYS, SIG_DFL);
382 #endif
383 #if !defined(PETSC_MISSING_SIGTERM)
384     signal(SIGTERM, SIG_DFL);
385 #endif
386 #if !defined(PETSC_MISSING_SIGTRAP)
387     signal(SIGTRAP, SIG_DFL);
388 #endif
389 #if !defined(PETSC_MISSING_SIGTSTP)
390     /* signal(SIGTSTP, SIG_DFL); */
391 #endif
392 #if !defined(PETSC_MISSING_SIGURG)
393     signal(SIGURG, SIG_DFL);
394 #endif
395 #if !defined(PETSC_MISSING_SIGUSR1)
396     /* signal(SIGUSR1, SIG_DFL); */
397 #endif
398 #if !defined(PETSC_MISSING_SIGUSR2)
399     /* signal(SIGUSR2, SIG_DFL); */
400 #endif
401     SignalSet = PETSC_FALSE;
402   } else {
403     SignalSet = PETSC_TRUE;
404   }
405   PetscFunctionReturn(0);
406 }
407