xref: /petsc/src/sys/error/signal.c (revision f2c6b1a247e0aba1e6cff92019aae48a2a13617a)
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h> /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId classid;
14   PetscErrorCode (*handler)(int, void *);
15   void      *ctx;
16   struct SH *previous;
17 };
18 static struct SH *sh        = NULL;
19 static PetscBool  SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void)
25 {
26   _Exit(MPI_ERR_OTHER);
27 }
28 
29 /*
30     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
31              any signal handler set by PETSc or the application code.
32 
33    Input Parameters: (depends on system)
34 .    sig - integer code indicating the type of signal
35 .    code - ??
36 .    sigcontext - ??
37 .    addr - ??
38 
39 */
40 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
41 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr)
42 #else
43 static void PetscSignalHandler_Private(int sig)
44 #endif
45 {
46   PetscErrorCode ierr;
47 
48   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, (void *)0);
49   else {
50     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted");
51     ierr = (*sh->handler)(sig, sh->ctx);
52   }
53   if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR);
54 }
55 
56 /*@
57    PetscSignalHandlerDefault - Default signal handler.
58 
59    Not Collective
60 
61    Input Parameters:
62 +  sig - signal value
63 -  ptr - unused pointer
64 
65    Developer Note:
66    This does not call `PetscError()`, handles the entire error process directly
67 
68    Level: advanced
69 
70 @*/
71 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr)
72 {
73   const char *SIGNAME[64];
74 
75   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
76   SIGNAME[0] = "Unknown signal";
77 #if !defined(PETSC_MISSING_SIGABRT)
78   SIGNAME[SIGABRT] = "Abort";
79 #endif
80 #if !defined(PETSC_MISSING_SIGALRM)
81   SIGNAME[SIGALRM] = "Alarm";
82 #endif
83 #if !defined(PETSC_MISSING_SIGBUS)
84   SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access";
85 #endif
86 #if !defined(PETSC_MISSING_SIGCHLD)
87   SIGNAME[SIGCHLD] = "CHLD";
88 #endif
89 #if !defined(PETSC_MISSING_SIGCONT)
90   SIGNAME[SIGCONT] = "CONT";
91 #endif
92 #if !defined(PETSC_MISSING_SIGFPE)
93   SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero";
94 #endif
95 #if !defined(PETSC_MISSING_SIGHUP)
96   SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end";
97 #endif
98 #if !defined(PETSC_MISSING_SIGILL)
99   SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption";
100 #endif
101 #if !defined(PETSC_MISSING_SIGINT)
102   SIGNAME[SIGINT] = "Interrupt";
103 #endif
104 #if !defined(PETSC_MISSING_SIGKILL)
105   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
106 #endif
107 #if !defined(PETSC_MISSING_SIGPIPE)
108   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
109 #endif
110 #if !defined(PETSC_MISSING_SIGQUIT)
111   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
112 #endif
113 #if !defined(PETSC_MISSING_SIGSEGV)
114   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
115 #endif
116 #if !defined(PETSC_MISSING_SIGSYS)
117   SIGNAME[SIGSYS] = "SYS";
118 #endif
119 #if !defined(PETSC_MISSING_SIGTERM)
120   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
121 #endif
122 #if !defined(PETSC_MISSING_SIGTRAP)
123   SIGNAME[SIGTRAP] = "TRAP";
124 #endif
125 #if !defined(PETSC_MISSING_SIGTSTP)
126   SIGNAME[SIGTSTP] = "TSTP";
127 #endif
128 #if !defined(PETSC_MISSING_SIGURG)
129   SIGNAME[SIGURG] = "URG";
130 #endif
131 #if !defined(PETSC_MISSING_SIGUSR1)
132   SIGNAME[SIGUSR1] = "User 1";
133 #endif
134 #if !defined(PETSC_MISSING_SIGUSR2)
135   SIGNAME[SIGUSR2] = "User 2";
136 #endif
137 
138   signal(sig, SIG_DFL);
139   PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
140   (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
141   if (sig >= 0 && sig <= 20) (*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]);
142   else (*PetscErrorPrintf)("Caught signal\n");
143 
144   (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
145   (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
146 #if defined(PETSC_HAVE_CUDA)
147   (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
148 #endif
149 #if PetscDefined(USE_DEBUG)
150   #if !PetscDefined(HAVE_THREADSAFETY)
151   (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
152   PetscStackView(PETSC_STDOUT);
153   #endif
154 #else
155   (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
156   (*PetscErrorPrintf)("to get more information on the crash.\n");
157 #endif
158 #if !defined(PETSC_MISSING_SIGBUS)
159   if (sig == SIGSEGV || sig == SIGBUS) {
160 #else
161   if (sig == SIGSEGV) {
162 #endif
163     PetscBool debug;
164 
165     PetscMallocGetDebug(&debug, NULL, NULL);
166     if (debug) PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__);
167     else (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
168   }
169   atexit(MyExit);
170   PETSCABORT(PETSC_COMM_WORLD, (int)PETSC_ERR_SIG);
171   return 0;
172 }
173 
174 #if !defined(PETSC_SIGNAL_CAST)
175   #define PETSC_SIGNAL_CAST
176 #endif
177 
178 /*@C
179    PetscPushSignalHandler - Catches the usual fatal errors and
180    calls a user-provided routine.
181 
182    Not Collective
183 
184    Input Parameters:
185 +  routine - routine to call when a signal is received
186 -  ctx - optional context needed by the routine
187 
188   Level: developer
189 
190 .seealso: `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
191 @*/
192 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), void *ctx)
193 {
194   struct SH *newsh;
195 
196   PetscFunctionBegin;
197   if (!SIGNAL_CLASSID) {
198     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
199     SIGNAL_CLASSID = 19;
200   }
201   if (!SignalSet && routine) {
202     /* Do not catch ABRT, CHLD, KILL */
203 #if !defined(PETSC_MISSING_SIGALRM)
204     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
205 #endif
206 #if !defined(PETSC_MISSING_SIGBUS)
207     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
208 #endif
209 #if !defined(PETSC_MISSING_SIGCONT)
210     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
211 #endif
212 #if !defined(PETSC_MISSING_SIGFPE)
213     signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
214 #endif
215 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
216     {
217       struct sigaction action;
218       sigaction(SIGHUP, NULL, &action);
219       if (action.sa_handler == SIG_IGN) {
220         PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefor not changing its signal handler\n"));
221       } else {
222         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
223       }
224     }
225 #endif
226 #if !defined(PETSC_MISSING_SIGILL)
227     signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
228 #endif
229 #if !defined(PETSC_MISSING_SIGINT)
230     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
231 #endif
232 #if !defined(PETSC_MISSING_SIGPIPE)
233     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
234 #endif
235 #if !defined(PETSC_MISSING_SIGQUIT)
236     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
237 #endif
238 #if !defined(PETSC_MISSING_SIGSEGV)
239     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
240 #endif
241 #if !defined(PETSC_MISSING_SIGSYS)
242     signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
243 #endif
244 #if !defined(PETSC_MISSING_SIGTERM)
245   #if !defined(OMPI_MAJOR_VERSION)
246     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
247     signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
248   #endif
249 #endif
250 #if !defined(PETSC_MISSING_SIGTRAP)
251     signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
252 #endif
253 #if !defined(PETSC_MISSING_SIGTSTP)
254     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
255 #endif
256 #if !defined(PETSC_MISSING_SIGURG)
257     signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
258 #endif
259 #if !defined(PETSC_MISSING_SIGUSR1)
260     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
261 #endif
262 #if !defined(PETSC_MISSING_SIGUSR2)
263     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
264 #endif
265     SignalSet = PETSC_TRUE;
266   }
267   if (!routine) {
268 #if !defined(PETSC_MISSING_SIGALRM)
269     /* signal(SIGALRM, SIG_DFL); */
270 #endif
271 #if !defined(PETSC_MISSING_SIGBUS)
272     signal(SIGBUS, SIG_DFL);
273 #endif
274 #if !defined(PETSC_MISSING_SIGCONT)
275     /* signal(SIGCONT, SIG_DFL); */
276 #endif
277 #if !defined(PETSC_MISSING_SIGFPE)
278     signal(SIGFPE, SIG_DFL);
279 #endif
280 #if !defined(PETSC_MISSING_SIGHUP)
281     signal(SIGHUP, SIG_DFL);
282 #endif
283 #if !defined(PETSC_MISSING_SIGILL)
284     signal(SIGILL, SIG_DFL);
285 #endif
286 #if !defined(PETSC_MISSING_SIGINT)
287     /* signal(SIGINT,  SIG_DFL); */
288 #endif
289 #if !defined(PETSC_MISSING_SIGPIPE)
290     signal(SIGPIPE, SIG_DFL);
291 #endif
292 #if !defined(PETSC_MISSING_SIGQUIT)
293     signal(SIGQUIT, SIG_DFL);
294 #endif
295 #if !defined(PETSC_MISSING_SIGSEGV)
296     signal(SIGSEGV, SIG_DFL);
297 #endif
298 #if !defined(PETSC_MISSING_SIGSYS)
299     signal(SIGSYS, SIG_DFL);
300 #endif
301 #if !defined(PETSC_MISSING_SIGTERM)
302     signal(SIGTERM, SIG_DFL);
303 #endif
304 #if !defined(PETSC_MISSING_SIGTRAP)
305     signal(SIGTRAP, SIG_DFL);
306 #endif
307 #if !defined(PETSC_MISSING_SIGTSTP)
308     /* signal(SIGTSTP, SIG_DFL); */
309 #endif
310 #if !defined(PETSC_MISSING_SIGURG)
311     signal(SIGURG, SIG_DFL);
312 #endif
313 #if !defined(PETSC_MISSING_SIGUSR1)
314     /* signal(SIGUSR1, SIG_DFL); */
315 #endif
316 #if !defined(PETSC_MISSING_SIGUSR2)
317     /* signal(SIGUSR2, SIG_DFL); */
318 #endif
319     SignalSet = PETSC_FALSE;
320   }
321   PetscCall(PetscNew(&newsh));
322   if (sh) {
323     PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
324     newsh->previous = sh;
325   } else newsh->previous = NULL;
326   newsh->handler = routine;
327   newsh->ctx     = ctx;
328   newsh->classid = SIGNAL_CLASSID;
329   sh             = newsh;
330   PetscFunctionReturn(0);
331 }
332 
333 /*@
334    PetscPopSignalHandler - Removes the most last signal handler that was pushed.
335        If no signal handlers are left on the stack it will remove the PETSc signal handler.
336        (That is PETSc will no longer catch signals).
337 
338    Not Collective
339 
340   Level: developer
341 
342 .seealso: `PetscPushSignalHandler()`
343 @*/
344 PetscErrorCode PetscPopSignalHandler(void)
345 {
346   struct SH *tmp;
347 
348   PetscFunctionBegin;
349   if (!sh) PetscFunctionReturn(0);
350   PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
351 
352   tmp = sh;
353   sh  = sh->previous;
354   PetscCall(PetscFree(tmp));
355   if (!sh || !sh->handler) {
356 #if !defined(PETSC_MISSING_SIGALRM)
357     /* signal(SIGALRM, SIG_DFL); */
358 #endif
359 #if !defined(PETSC_MISSING_SIGBUS)
360     signal(SIGBUS, SIG_DFL);
361 #endif
362 #if !defined(PETSC_MISSING_SIGCONT)
363     /* signal(SIGCONT, SIG_DFL); */
364 #endif
365 #if !defined(PETSC_MISSING_SIGFPE)
366     signal(SIGFPE, SIG_DFL);
367 #endif
368 #if !defined(PETSC_MISSING_SIGHUP)
369     signal(SIGHUP, SIG_DFL);
370 #endif
371 #if !defined(PETSC_MISSING_SIGILL)
372     signal(SIGILL, SIG_DFL);
373 #endif
374 #if !defined(PETSC_MISSING_SIGINT)
375     /* signal(SIGINT,  SIG_DFL); */
376 #endif
377 #if !defined(PETSC_MISSING_SIGPIPE)
378     signal(SIGPIPE, SIG_DFL);
379 #endif
380 #if !defined(PETSC_MISSING_SIGQUIT)
381     signal(SIGQUIT, SIG_DFL);
382 #endif
383 #if !defined(PETSC_MISSING_SIGSEGV)
384     signal(SIGSEGV, SIG_DFL);
385 #endif
386 #if !defined(PETSC_MISSING_SIGSYS)
387     signal(SIGSYS, SIG_DFL);
388 #endif
389 #if !defined(PETSC_MISSING_SIGTERM)
390     signal(SIGTERM, SIG_DFL);
391 #endif
392 #if !defined(PETSC_MISSING_SIGTRAP)
393     signal(SIGTRAP, SIG_DFL);
394 #endif
395 #if !defined(PETSC_MISSING_SIGTSTP)
396     /* signal(SIGTSTP, SIG_DFL); */
397 #endif
398 #if !defined(PETSC_MISSING_SIGURG)
399     signal(SIGURG, SIG_DFL);
400 #endif
401 #if !defined(PETSC_MISSING_SIGUSR1)
402     /* signal(SIGUSR1, SIG_DFL); */
403 #endif
404 #if !defined(PETSC_MISSING_SIGUSR2)
405     /* signal(SIGUSR2, SIG_DFL); */
406 #endif
407     SignalSet = PETSC_FALSE;
408   } else {
409     SignalSet = PETSC_TRUE;
410   }
411   PetscFunctionReturn(0);
412 }
413