xref: /petsc/src/sys/error/signal.c (revision 9371c9d470a9602b6d10a8bf50c9b2280a79e45a)
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h> /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId classid;
14   PetscErrorCode (*handler)(int, void *);
15   void      *ctx;
16   struct SH *previous;
17 };
18 static struct SH *sh        = NULL;
19 static PetscBool  SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void) {
25   _Exit(MPI_ERR_OTHER);
26 }
27 
28 /*
29     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
30              any signal handler set by PETSc or the application code.
31 
32    Input Parameters: (depends on system)
33 .    sig - integer code indicating the type of signal
34 .    code - ??
35 .    sigcontext - ??
36 .    addr - ??
37 
38 */
39 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
40 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr)
41 #else
42 static void PetscSignalHandler_Private(int sig)
43 #endif
44 {
45   PetscErrorCode ierr;
46 
47   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, (void *)0);
48   else {
49     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted");
50     ierr = (*sh->handler)(sig, sh->ctx);
51   }
52   if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR);
53 }
54 
55 /*@
56    PetscSignalHandlerDefault - Default signal handler.
57 
58    Not Collective
59 
60    Input Parameters:
61 +  sig - signal value
62 -  ptr - unused pointer
63 
64    Developer Note:
65    This does not call PetscError(), handles the entire error process directly
66 
67    Level: advanced
68 
69 @*/
70 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr) {
71   const char *SIGNAME[64];
72 
73   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
74   SIGNAME[0] = "Unknown signal";
75 #if !defined(PETSC_MISSING_SIGABRT)
76   SIGNAME[SIGABRT] = "Abort";
77 #endif
78 #if !defined(PETSC_MISSING_SIGALRM)
79   SIGNAME[SIGALRM] = "Alarm";
80 #endif
81 #if !defined(PETSC_MISSING_SIGBUS)
82   SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access";
83 #endif
84 #if !defined(PETSC_MISSING_SIGCHLD)
85   SIGNAME[SIGCHLD] = "CHLD";
86 #endif
87 #if !defined(PETSC_MISSING_SIGCONT)
88   SIGNAME[SIGCONT] = "CONT";
89 #endif
90 #if !defined(PETSC_MISSING_SIGFPE)
91   SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero";
92 #endif
93 #if !defined(PETSC_MISSING_SIGHUP)
94   SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end";
95 #endif
96 #if !defined(PETSC_MISSING_SIGILL)
97   SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption";
98 #endif
99 #if !defined(PETSC_MISSING_SIGINT)
100   SIGNAME[SIGINT] = "Interrupt";
101 #endif
102 #if !defined(PETSC_MISSING_SIGKILL)
103   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
104 #endif
105 #if !defined(PETSC_MISSING_SIGPIPE)
106   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
107 #endif
108 #if !defined(PETSC_MISSING_SIGQUIT)
109   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
110 #endif
111 #if !defined(PETSC_MISSING_SIGSEGV)
112   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
113 #endif
114 #if !defined(PETSC_MISSING_SIGSYS)
115   SIGNAME[SIGSYS] = "SYS";
116 #endif
117 #if !defined(PETSC_MISSING_SIGTERM)
118   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
119 #endif
120 #if !defined(PETSC_MISSING_SIGTRAP)
121   SIGNAME[SIGTRAP] = "TRAP";
122 #endif
123 #if !defined(PETSC_MISSING_SIGTSTP)
124   SIGNAME[SIGTSTP] = "TSTP";
125 #endif
126 #if !defined(PETSC_MISSING_SIGURG)
127   SIGNAME[SIGURG] = "URG";
128 #endif
129 #if !defined(PETSC_MISSING_SIGUSR1)
130   SIGNAME[SIGUSR1] = "User 1";
131 #endif
132 #if !defined(PETSC_MISSING_SIGUSR2)
133   SIGNAME[SIGUSR2] = "User 2";
134 #endif
135 
136   signal(sig, SIG_DFL);
137   PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
138   (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
139   if (sig >= 0 && sig <= 20) (*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]);
140   else (*PetscErrorPrintf)("Caught signal\n");
141 
142   (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
143   (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
144 #if defined(PETSC_HAVE_CUDA)
145   (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
146 #endif
147 #if PetscDefined(USE_DEBUG)
148   (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
149   PetscStackView(PETSC_STDOUT);
150 #else
151   (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
152   (*PetscErrorPrintf)("to get more information on the crash.\n");
153 #endif
154 #if !defined(PETSC_MISSING_SIGBUS)
155   if (sig == SIGSEGV || sig == SIGBUS) {
156 #else
157   if (sig == SIGSEGV) {
158 #endif
159     PetscBool debug;
160 
161     PetscMallocGetDebug(&debug, NULL, NULL);
162     if (debug) PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__);
163     else (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
164   }
165   atexit(MyExit);
166   PETSCABORT(PETSC_COMM_WORLD, (int)PETSC_ERR_SIG);
167   return 0;
168 }
169 
170 #if !defined(PETSC_SIGNAL_CAST)
171 #define PETSC_SIGNAL_CAST
172 #endif
173 
174 /*@C
175    PetscPushSignalHandler - Catches the usual fatal errors and
176    calls a user-provided routine.
177 
178    Not Collective
179 
180    Input Parameters:
181 +  routine - routine to call when a signal is received
182 -  ctx - optional context needed by the routine
183 
184   Level: developer
185 
186 .seealso: `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
187 
188 @*/
189 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), void *ctx) {
190   struct SH *newsh;
191 
192   PetscFunctionBegin;
193   if (!SIGNAL_CLASSID) {
194     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
195     SIGNAL_CLASSID = 19;
196   }
197   if (!SignalSet && routine) {
198     /* Do not catch ABRT, CHLD, KILL */
199 #if !defined(PETSC_MISSING_SIGALRM)
200     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
201 #endif
202 #if !defined(PETSC_MISSING_SIGBUS)
203     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
204 #endif
205 #if !defined(PETSC_MISSING_SIGCONT)
206     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
207 #endif
208 #if !defined(PETSC_MISSING_SIGFPE)
209     signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
210 #endif
211 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
212     {
213       struct sigaction action;
214       sigaction(SIGHUP, NULL, &action);
215       if (action.sa_handler == SIG_IGN) {
216         PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefor not changing its signal handler\n"));
217       } else {
218         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
219       }
220     }
221 #endif
222 #if !defined(PETSC_MISSING_SIGILL)
223     signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
224 #endif
225 #if !defined(PETSC_MISSING_SIGINT)
226     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
227 #endif
228 #if !defined(PETSC_MISSING_SIGPIPE)
229     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
230 #endif
231 #if !defined(PETSC_MISSING_SIGQUIT)
232     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
233 #endif
234 #if !defined(PETSC_MISSING_SIGSEGV)
235     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
236 #endif
237 #if !defined(PETSC_MISSING_SIGSYS)
238     signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
239 #endif
240 #if !defined(PETSC_MISSING_SIGTERM)
241 #if !defined(OMPI_MAJOR_VERSION)
242     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
243     signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
244 #endif
245 #endif
246 #if !defined(PETSC_MISSING_SIGTRAP)
247     signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
248 #endif
249 #if !defined(PETSC_MISSING_SIGTSTP)
250     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
251 #endif
252 #if !defined(PETSC_MISSING_SIGURG)
253     signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
254 #endif
255 #if !defined(PETSC_MISSING_SIGUSR1)
256     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
257 #endif
258 #if !defined(PETSC_MISSING_SIGUSR2)
259     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
260 #endif
261     SignalSet = PETSC_TRUE;
262   }
263   if (!routine) {
264 #if !defined(PETSC_MISSING_SIGALRM)
265     /* signal(SIGALRM, SIG_DFL); */
266 #endif
267 #if !defined(PETSC_MISSING_SIGBUS)
268     signal(SIGBUS, SIG_DFL);
269 #endif
270 #if !defined(PETSC_MISSING_SIGCONT)
271     /* signal(SIGCONT, SIG_DFL); */
272 #endif
273 #if !defined(PETSC_MISSING_SIGFPE)
274     signal(SIGFPE, SIG_DFL);
275 #endif
276 #if !defined(PETSC_MISSING_SIGHUP)
277     signal(SIGHUP, SIG_DFL);
278 #endif
279 #if !defined(PETSC_MISSING_SIGILL)
280     signal(SIGILL, SIG_DFL);
281 #endif
282 #if !defined(PETSC_MISSING_SIGINT)
283     /* signal(SIGINT,  SIG_DFL); */
284 #endif
285 #if !defined(PETSC_MISSING_SIGPIPE)
286     signal(SIGPIPE, SIG_DFL);
287 #endif
288 #if !defined(PETSC_MISSING_SIGQUIT)
289     signal(SIGQUIT, SIG_DFL);
290 #endif
291 #if !defined(PETSC_MISSING_SIGSEGV)
292     signal(SIGSEGV, SIG_DFL);
293 #endif
294 #if !defined(PETSC_MISSING_SIGSYS)
295     signal(SIGSYS, SIG_DFL);
296 #endif
297 #if !defined(PETSC_MISSING_SIGTERM)
298     signal(SIGTERM, SIG_DFL);
299 #endif
300 #if !defined(PETSC_MISSING_SIGTRAP)
301     signal(SIGTRAP, SIG_DFL);
302 #endif
303 #if !defined(PETSC_MISSING_SIGTSTP)
304     /* signal(SIGTSTP, SIG_DFL); */
305 #endif
306 #if !defined(PETSC_MISSING_SIGURG)
307     signal(SIGURG, SIG_DFL);
308 #endif
309 #if !defined(PETSC_MISSING_SIGUSR1)
310     /* signal(SIGUSR1, SIG_DFL); */
311 #endif
312 #if !defined(PETSC_MISSING_SIGUSR2)
313     /* signal(SIGUSR2, SIG_DFL); */
314 #endif
315     SignalSet = PETSC_FALSE;
316   }
317   PetscCall(PetscNew(&newsh));
318   if (sh) {
319     PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
320     newsh->previous = sh;
321   } else newsh->previous = NULL;
322   newsh->handler = routine;
323   newsh->ctx     = ctx;
324   newsh->classid = SIGNAL_CLASSID;
325   sh             = newsh;
326   PetscFunctionReturn(0);
327 }
328 
329 /*@
330    PetscPopSignalHandler - Removes the most last signal handler that was pushed.
331        If no signal handlers are left on the stack it will remove the PETSc signal handler.
332        (That is PETSc will no longer catch signals).
333 
334    Not Collective
335 
336   Level: developer
337 
338 .seealso: `PetscPushSignalHandler()`
339 
340 @*/
341 PetscErrorCode PetscPopSignalHandler(void) {
342   struct SH *tmp;
343 
344   PetscFunctionBegin;
345   if (!sh) PetscFunctionReturn(0);
346   PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
347 
348   tmp = sh;
349   sh  = sh->previous;
350   PetscCall(PetscFree(tmp));
351   if (!sh || !sh->handler) {
352 #if !defined(PETSC_MISSING_SIGALRM)
353     /* signal(SIGALRM, SIG_DFL); */
354 #endif
355 #if !defined(PETSC_MISSING_SIGBUS)
356     signal(SIGBUS, SIG_DFL);
357 #endif
358 #if !defined(PETSC_MISSING_SIGCONT)
359     /* signal(SIGCONT, SIG_DFL); */
360 #endif
361 #if !defined(PETSC_MISSING_SIGFPE)
362     signal(SIGFPE, SIG_DFL);
363 #endif
364 #if !defined(PETSC_MISSING_SIGHUP)
365     signal(SIGHUP, SIG_DFL);
366 #endif
367 #if !defined(PETSC_MISSING_SIGILL)
368     signal(SIGILL, SIG_DFL);
369 #endif
370 #if !defined(PETSC_MISSING_SIGINT)
371     /* signal(SIGINT,  SIG_DFL); */
372 #endif
373 #if !defined(PETSC_MISSING_SIGPIPE)
374     signal(SIGPIPE, SIG_DFL);
375 #endif
376 #if !defined(PETSC_MISSING_SIGQUIT)
377     signal(SIGQUIT, SIG_DFL);
378 #endif
379 #if !defined(PETSC_MISSING_SIGSEGV)
380     signal(SIGSEGV, SIG_DFL);
381 #endif
382 #if !defined(PETSC_MISSING_SIGSYS)
383     signal(SIGSYS, SIG_DFL);
384 #endif
385 #if !defined(PETSC_MISSING_SIGTERM)
386     signal(SIGTERM, SIG_DFL);
387 #endif
388 #if !defined(PETSC_MISSING_SIGTRAP)
389     signal(SIGTRAP, SIG_DFL);
390 #endif
391 #if !defined(PETSC_MISSING_SIGTSTP)
392     /* signal(SIGTSTP, SIG_DFL); */
393 #endif
394 #if !defined(PETSC_MISSING_SIGURG)
395     signal(SIGURG, SIG_DFL);
396 #endif
397 #if !defined(PETSC_MISSING_SIGUSR1)
398     /* signal(SIGUSR1, SIG_DFL); */
399 #endif
400 #if !defined(PETSC_MISSING_SIGUSR2)
401     /* signal(SIGUSR2, SIG_DFL); */
402 #endif
403     SignalSet = PETSC_FALSE;
404   } else {
405     SignalSet = PETSC_TRUE;
406   }
407   PetscFunctionReturn(0);
408 }
409