xref: /petsc/src/sys/error/signal.c (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h> /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId classid;
14   PetscErrorCode (*handler)(int, void *);
15   void      *ctx;
16   struct SH *previous;
17 };
18 static struct SH *sh        = NULL;
19 static PetscBool  SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void)
25 {
26   _Exit(MPI_ERR_OTHER);
27 }
28 
29 /*
30     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
31              any signal handler set by PETSc or the application code.
32 
33    Input Parameters: (depends on system)
34 .    sig - integer code indicating the type of signal
35 .    code - ??
36 .    sigcontext - ??
37 .    addr - ??
38 
39 */
40 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
41 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr)
42 #else
43 static void PetscSignalHandler_Private(int sig)
44 #endif
45 {
46   PetscErrorCode ierr;
47 
48   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, (void *)0);
49   else {
50     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted");
51     ierr = (*sh->handler)(sig, sh->ctx);
52   }
53   if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR);
54 }
55 
56 /*@
57    PetscSignalHandlerDefault - Default signal handler.
58 
59    Not Collective
60 
61    Input Parameters:
62 +  sig - signal value
63 -  ptr - unused pointer
64 
65    Developer Note:
66    This does not call `PetscError()`, handles the entire error process directly
67 
68    Level: advanced
69 
70 @*/
71 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr)
72 {
73   const char *SIGNAME[64];
74 
75   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
76   SIGNAME[0] = "Unknown signal";
77 #if !defined(PETSC_MISSING_SIGABRT)
78   SIGNAME[SIGABRT] = "Abort";
79 #endif
80 #if !defined(PETSC_MISSING_SIGALRM)
81   SIGNAME[SIGALRM] = "Alarm";
82 #endif
83 #if !defined(PETSC_MISSING_SIGBUS)
84   SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access";
85 #endif
86 #if !defined(PETSC_MISSING_SIGCHLD)
87   SIGNAME[SIGCHLD] = "CHLD";
88 #endif
89 #if !defined(PETSC_MISSING_SIGCONT)
90   SIGNAME[SIGCONT] = "CONT";
91 #endif
92 #if !defined(PETSC_MISSING_SIGFPE)
93   SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero";
94 #endif
95 #if !defined(PETSC_MISSING_SIGHUP)
96   SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end";
97 #endif
98 #if !defined(PETSC_MISSING_SIGILL)
99   SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption";
100 #endif
101 #if !defined(PETSC_MISSING_SIGINT)
102   SIGNAME[SIGINT] = "Interrupt";
103 #endif
104 #if !defined(PETSC_MISSING_SIGKILL)
105   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
106 #endif
107 #if !defined(PETSC_MISSING_SIGPIPE)
108   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
109 #endif
110 #if !defined(PETSC_MISSING_SIGQUIT)
111   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
112 #endif
113 #if !defined(PETSC_MISSING_SIGSEGV)
114   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
115 #endif
116 #if !defined(PETSC_MISSING_SIGSYS)
117   SIGNAME[SIGSYS] = "SYS";
118 #endif
119 #if !defined(PETSC_MISSING_SIGTERM)
120   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
121 #endif
122 #if !defined(PETSC_MISSING_SIGTRAP)
123   SIGNAME[SIGTRAP] = "TRAP";
124 #endif
125 #if !defined(PETSC_MISSING_SIGTSTP)
126   SIGNAME[SIGTSTP] = "TSTP";
127 #endif
128 #if !defined(PETSC_MISSING_SIGURG)
129   SIGNAME[SIGURG] = "URG";
130 #endif
131 #if !defined(PETSC_MISSING_SIGUSR1)
132   SIGNAME[SIGUSR1] = "User 1";
133 #endif
134 #if !defined(PETSC_MISSING_SIGUSR2)
135   SIGNAME[SIGUSR2] = "User 2";
136 #endif
137 
138   signal(sig, SIG_DFL);
139   PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
140   (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
141   if (sig >= 0 && sig <= 20) (*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]);
142   else (*PetscErrorPrintf)("Caught signal\n");
143 
144   (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
145   (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
146 #if defined(PETSC_HAVE_CUDA)
147   (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
148 #endif
149 #if PetscDefined(USE_DEBUG)
150   (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
151   PetscStackView(PETSC_STDOUT);
152 #else
153   (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
154   (*PetscErrorPrintf)("to get more information on the crash.\n");
155 #endif
156 #if !defined(PETSC_MISSING_SIGBUS)
157   if (sig == SIGSEGV || sig == SIGBUS) {
158 #else
159   if (sig == SIGSEGV) {
160 #endif
161     PetscBool debug;
162 
163     PetscMallocGetDebug(&debug, NULL, NULL);
164     if (debug) PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__);
165     else (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
166   }
167   atexit(MyExit);
168   PETSCABORT(PETSC_COMM_WORLD, (int)PETSC_ERR_SIG);
169   return 0;
170 }
171 
172 #if !defined(PETSC_SIGNAL_CAST)
173   #define PETSC_SIGNAL_CAST
174 #endif
175 
176 /*@C
177    PetscPushSignalHandler - Catches the usual fatal errors and
178    calls a user-provided routine.
179 
180    Not Collective
181 
182    Input Parameters:
183 +  routine - routine to call when a signal is received
184 -  ctx - optional context needed by the routine
185 
186   Level: developer
187 
188 .seealso: `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
189 @*/
190 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), void *ctx)
191 {
192   struct SH *newsh;
193 
194   PetscFunctionBegin;
195   if (!SIGNAL_CLASSID) {
196     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
197     SIGNAL_CLASSID = 19;
198   }
199   if (!SignalSet && routine) {
200     /* Do not catch ABRT, CHLD, KILL */
201 #if !defined(PETSC_MISSING_SIGALRM)
202     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
203 #endif
204 #if !defined(PETSC_MISSING_SIGBUS)
205     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
206 #endif
207 #if !defined(PETSC_MISSING_SIGCONT)
208     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
209 #endif
210 #if !defined(PETSC_MISSING_SIGFPE)
211     signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
212 #endif
213 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
214     {
215       struct sigaction action;
216       sigaction(SIGHUP, NULL, &action);
217       if (action.sa_handler == SIG_IGN) {
218         PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefor not changing its signal handler\n"));
219       } else {
220         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
221       }
222     }
223 #endif
224 #if !defined(PETSC_MISSING_SIGILL)
225     signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
226 #endif
227 #if !defined(PETSC_MISSING_SIGINT)
228     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
229 #endif
230 #if !defined(PETSC_MISSING_SIGPIPE)
231     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
232 #endif
233 #if !defined(PETSC_MISSING_SIGQUIT)
234     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
235 #endif
236 #if !defined(PETSC_MISSING_SIGSEGV)
237     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
238 #endif
239 #if !defined(PETSC_MISSING_SIGSYS)
240     signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
241 #endif
242 #if !defined(PETSC_MISSING_SIGTERM)
243   #if !defined(OMPI_MAJOR_VERSION)
244     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
245     signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
246   #endif
247 #endif
248 #if !defined(PETSC_MISSING_SIGTRAP)
249     signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
250 #endif
251 #if !defined(PETSC_MISSING_SIGTSTP)
252     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
253 #endif
254 #if !defined(PETSC_MISSING_SIGURG)
255     signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
256 #endif
257 #if !defined(PETSC_MISSING_SIGUSR1)
258     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
259 #endif
260 #if !defined(PETSC_MISSING_SIGUSR2)
261     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
262 #endif
263     SignalSet = PETSC_TRUE;
264   }
265   if (!routine) {
266 #if !defined(PETSC_MISSING_SIGALRM)
267     /* signal(SIGALRM, SIG_DFL); */
268 #endif
269 #if !defined(PETSC_MISSING_SIGBUS)
270     signal(SIGBUS, SIG_DFL);
271 #endif
272 #if !defined(PETSC_MISSING_SIGCONT)
273     /* signal(SIGCONT, SIG_DFL); */
274 #endif
275 #if !defined(PETSC_MISSING_SIGFPE)
276     signal(SIGFPE, SIG_DFL);
277 #endif
278 #if !defined(PETSC_MISSING_SIGHUP)
279     signal(SIGHUP, SIG_DFL);
280 #endif
281 #if !defined(PETSC_MISSING_SIGILL)
282     signal(SIGILL, SIG_DFL);
283 #endif
284 #if !defined(PETSC_MISSING_SIGINT)
285     /* signal(SIGINT,  SIG_DFL); */
286 #endif
287 #if !defined(PETSC_MISSING_SIGPIPE)
288     signal(SIGPIPE, SIG_DFL);
289 #endif
290 #if !defined(PETSC_MISSING_SIGQUIT)
291     signal(SIGQUIT, SIG_DFL);
292 #endif
293 #if !defined(PETSC_MISSING_SIGSEGV)
294     signal(SIGSEGV, SIG_DFL);
295 #endif
296 #if !defined(PETSC_MISSING_SIGSYS)
297     signal(SIGSYS, SIG_DFL);
298 #endif
299 #if !defined(PETSC_MISSING_SIGTERM)
300     signal(SIGTERM, SIG_DFL);
301 #endif
302 #if !defined(PETSC_MISSING_SIGTRAP)
303     signal(SIGTRAP, SIG_DFL);
304 #endif
305 #if !defined(PETSC_MISSING_SIGTSTP)
306     /* signal(SIGTSTP, SIG_DFL); */
307 #endif
308 #if !defined(PETSC_MISSING_SIGURG)
309     signal(SIGURG, SIG_DFL);
310 #endif
311 #if !defined(PETSC_MISSING_SIGUSR1)
312     /* signal(SIGUSR1, SIG_DFL); */
313 #endif
314 #if !defined(PETSC_MISSING_SIGUSR2)
315     /* signal(SIGUSR2, SIG_DFL); */
316 #endif
317     SignalSet = PETSC_FALSE;
318   }
319   PetscCall(PetscNew(&newsh));
320   if (sh) {
321     PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
322     newsh->previous = sh;
323   } else newsh->previous = NULL;
324   newsh->handler = routine;
325   newsh->ctx     = ctx;
326   newsh->classid = SIGNAL_CLASSID;
327   sh             = newsh;
328   PetscFunctionReturn(0);
329 }
330 
331 /*@
332    PetscPopSignalHandler - Removes the most last signal handler that was pushed.
333        If no signal handlers are left on the stack it will remove the PETSc signal handler.
334        (That is PETSc will no longer catch signals).
335 
336    Not Collective
337 
338   Level: developer
339 
340 .seealso: `PetscPushSignalHandler()`
341 @*/
342 PetscErrorCode PetscPopSignalHandler(void)
343 {
344   struct SH *tmp;
345 
346   PetscFunctionBegin;
347   if (!sh) PetscFunctionReturn(0);
348   PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
349 
350   tmp = sh;
351   sh  = sh->previous;
352   PetscCall(PetscFree(tmp));
353   if (!sh || !sh->handler) {
354 #if !defined(PETSC_MISSING_SIGALRM)
355     /* signal(SIGALRM, SIG_DFL); */
356 #endif
357 #if !defined(PETSC_MISSING_SIGBUS)
358     signal(SIGBUS, SIG_DFL);
359 #endif
360 #if !defined(PETSC_MISSING_SIGCONT)
361     /* signal(SIGCONT, SIG_DFL); */
362 #endif
363 #if !defined(PETSC_MISSING_SIGFPE)
364     signal(SIGFPE, SIG_DFL);
365 #endif
366 #if !defined(PETSC_MISSING_SIGHUP)
367     signal(SIGHUP, SIG_DFL);
368 #endif
369 #if !defined(PETSC_MISSING_SIGILL)
370     signal(SIGILL, SIG_DFL);
371 #endif
372 #if !defined(PETSC_MISSING_SIGINT)
373     /* signal(SIGINT,  SIG_DFL); */
374 #endif
375 #if !defined(PETSC_MISSING_SIGPIPE)
376     signal(SIGPIPE, SIG_DFL);
377 #endif
378 #if !defined(PETSC_MISSING_SIGQUIT)
379     signal(SIGQUIT, SIG_DFL);
380 #endif
381 #if !defined(PETSC_MISSING_SIGSEGV)
382     signal(SIGSEGV, SIG_DFL);
383 #endif
384 #if !defined(PETSC_MISSING_SIGSYS)
385     signal(SIGSYS, SIG_DFL);
386 #endif
387 #if !defined(PETSC_MISSING_SIGTERM)
388     signal(SIGTERM, SIG_DFL);
389 #endif
390 #if !defined(PETSC_MISSING_SIGTRAP)
391     signal(SIGTRAP, SIG_DFL);
392 #endif
393 #if !defined(PETSC_MISSING_SIGTSTP)
394     /* signal(SIGTSTP, SIG_DFL); */
395 #endif
396 #if !defined(PETSC_MISSING_SIGURG)
397     signal(SIGURG, SIG_DFL);
398 #endif
399 #if !defined(PETSC_MISSING_SIGUSR1)
400     /* signal(SIGUSR1, SIG_DFL); */
401 #endif
402 #if !defined(PETSC_MISSING_SIGUSR2)
403     /* signal(SIGUSR2, SIG_DFL); */
404 #endif
405     SignalSet = PETSC_FALSE;
406   } else {
407     SignalSet = PETSC_TRUE;
408   }
409   PetscFunctionReturn(0);
410 }
411