xref: /petsc/src/sys/error/signal.c (revision 0619917b5a674bb687c64e7daba2ab22be99af31)
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h> /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId classid;
14   PetscErrorCode (*handler)(int, void *);
15   void      *ctx;
16   struct SH *previous;
17 };
18 static struct SH *sh        = NULL;
19 static PetscBool  SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void)
25 {
26   _Exit(MPI_ERR_OTHER);
27 }
28 
29 /*
30     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
31              any signal handler set by PETSc or the application code.
32 
33    Input Parameters: (depends on system)
34 .    sig - integer code indicating the type of signal
35 .    code - ??
36 .    sigcontext - ??
37 .    addr - ??
38 
39 */
40 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
41 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr)
42 #else
43 static void PetscSignalHandler_Private(int sig)
44 #endif
45 {
46   PetscErrorCode ierr;
47 
48   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, (void *)0);
49   else {
50     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted");
51     ierr = (*sh->handler)(sig, sh->ctx);
52   }
53   if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR);
54 }
55 
56 /*@
57   PetscSignalHandlerDefault - Default signal handler.
58 
59   Not Collective
60 
61   Input Parameters:
62 + sig - signal value
63 - ptr - unused pointer
64 
65   Level: advanced
66 
67   Developer Notes:
68   This does not call `PetscError()`, handles the entire error process directly
69 
70 .seealso: `PetscPushSignalHandler()`
71 @*/
72 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr)
73 {
74   PetscErrorCode ierr;
75   const char    *SIGNAME[64];
76 
77   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
78   SIGNAME[0] = "Unknown signal";
79 #if !defined(PETSC_MISSING_SIGABRT)
80   SIGNAME[SIGABRT] = "Abort";
81 #endif
82 #if !defined(PETSC_MISSING_SIGALRM)
83   SIGNAME[SIGALRM] = "Alarm";
84 #endif
85 #if !defined(PETSC_MISSING_SIGBUS)
86   SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access";
87 #endif
88 #if !defined(PETSC_MISSING_SIGCHLD)
89   SIGNAME[SIGCHLD] = "CHLD";
90 #endif
91 #if !defined(PETSC_MISSING_SIGCONT)
92   SIGNAME[SIGCONT] = "CONT";
93 #endif
94 #if !defined(PETSC_MISSING_SIGFPE)
95   SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero";
96 #endif
97 #if !defined(PETSC_MISSING_SIGHUP)
98   SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end";
99 #endif
100 #if !defined(PETSC_MISSING_SIGILL)
101   SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption";
102 #endif
103 #if !defined(PETSC_MISSING_SIGINT)
104   SIGNAME[SIGINT] = "Interrupt";
105 #endif
106 #if !defined(PETSC_MISSING_SIGKILL)
107   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
108 #endif
109 #if !defined(PETSC_MISSING_SIGPIPE)
110   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
111 #endif
112 #if !defined(PETSC_MISSING_SIGQUIT)
113   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
114 #endif
115 #if !defined(PETSC_MISSING_SIGSEGV)
116   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
117 #endif
118 #if !defined(PETSC_MISSING_SIGSYS)
119   SIGNAME[SIGSYS] = "SYS";
120 #endif
121 #if !defined(PETSC_MISSING_SIGTERM)
122   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
123 #endif
124 #if !defined(PETSC_MISSING_SIGTRAP)
125   SIGNAME[SIGTRAP] = "TRAP";
126 #endif
127 #if !defined(PETSC_MISSING_SIGTSTP)
128   SIGNAME[SIGTSTP] = "TSTP";
129 #endif
130 #if !defined(PETSC_MISSING_SIGURG)
131   SIGNAME[SIGURG] = "URG";
132 #endif
133 #if !defined(PETSC_MISSING_SIGUSR1)
134   SIGNAME[SIGUSR1] = "User 1";
135 #endif
136 #if !defined(PETSC_MISSING_SIGUSR2)
137   SIGNAME[SIGUSR2] = "User 2";
138 #endif
139 
140   signal(sig, SIG_DFL);
141   ierr = PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
142   ierr = (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
143   if (sig >= 0 && sig <= 20) ierr = (*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]);
144   else ierr = (*PetscErrorPrintf)("Caught signal\n");
145 
146   ierr = (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
147   ierr = (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
148 #if defined(PETSC_HAVE_CUDA)
149   ierr = (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
150 #endif
151 #if PetscDefined(USE_DEBUG)
152   #if !PetscDefined(HAVE_THREADSAFETY)
153   ierr = (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
154   ierr = PetscStackView(PETSC_STDOUT);
155   #endif
156 #else
157   ierr = (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
158   ierr = (*PetscErrorPrintf)("to get more information on the crash.\n");
159 #endif
160 #if !defined(PETSC_MISSING_SIGBUS)
161   if (sig == SIGSEGV || sig == SIGBUS) {
162 #else
163   if (sig == SIGSEGV) {
164 #endif
165     PetscBool debug;
166 
167     ierr = PetscMallocGetDebug(&debug, NULL, NULL);
168     if (debug) ierr = PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__);
169     else ierr = (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
170   }
171   atexit(MyExit);
172   (void)ierr;
173   PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_SIG);
174   return PETSC_SUCCESS;
175 }
176 
177 #if !defined(PETSC_SIGNAL_CAST)
178   #define PETSC_SIGNAL_CAST
179 #endif
180 
181 /*@C
182   PetscPushSignalHandler - Catches the usual fatal errors and
183   calls a user-provided routine.
184 
185   Not Collective
186 
187   Input Parameters:
188 + routine - routine to call when a signal is received
189 - ctx     - optional context needed by the routine
190 
191   Level: developer
192 
193 .seealso: `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
194 @*/
195 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), void *ctx)
196 {
197   struct SH *newsh;
198 
199   PetscFunctionBegin;
200   if (!SIGNAL_CLASSID) {
201     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
202     SIGNAL_CLASSID = 19;
203   }
204   if (!SignalSet && routine) {
205     /* Do not catch ABRT, CHLD, KILL */
206 #if !defined(PETSC_MISSING_SIGALRM)
207     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
208 #endif
209 #if !defined(PETSC_MISSING_SIGBUS)
210     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
211 #endif
212 #if !defined(PETSC_MISSING_SIGCONT)
213     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
214 #endif
215 #if !defined(PETSC_MISSING_SIGFPE)
216     signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
217 #endif
218 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
219     {
220       struct sigaction action;
221       sigaction(SIGHUP, NULL, &action);
222       if (action.sa_handler == SIG_IGN) {
223         PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefore not changing its signal handler\n"));
224       } else {
225         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
226       }
227     }
228 #endif
229 #if !defined(PETSC_MISSING_SIGILL)
230     signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
231 #endif
232 #if !defined(PETSC_MISSING_SIGINT)
233     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
234 #endif
235 #if !defined(PETSC_MISSING_SIGPIPE)
236     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
237 #endif
238 #if !defined(PETSC_MISSING_SIGQUIT)
239     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
240 #endif
241 #if !defined(PETSC_MISSING_SIGSEGV)
242     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
243 #endif
244 #if !defined(PETSC_MISSING_SIGSYS)
245     signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
246 #endif
247 #if !defined(PETSC_MISSING_SIGTERM)
248   #if !defined(OMPI_MAJOR_VERSION)
249     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
250     signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
251   #endif
252 #endif
253 #if !defined(PETSC_MISSING_SIGTRAP)
254     signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
255 #endif
256 #if !defined(PETSC_MISSING_SIGTSTP)
257     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
258 #endif
259 #if !defined(PETSC_MISSING_SIGURG)
260     signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
261 #endif
262 #if !defined(PETSC_MISSING_SIGUSR1)
263     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
264 #endif
265 #if !defined(PETSC_MISSING_SIGUSR2)
266     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
267 #endif
268     SignalSet = PETSC_TRUE;
269   }
270   if (!routine) {
271 #if !defined(PETSC_MISSING_SIGALRM)
272     /* signal(SIGALRM, SIG_DFL); */
273 #endif
274 #if !defined(PETSC_MISSING_SIGBUS)
275     signal(SIGBUS, SIG_DFL);
276 #endif
277 #if !defined(PETSC_MISSING_SIGCONT)
278     /* signal(SIGCONT, SIG_DFL); */
279 #endif
280 #if !defined(PETSC_MISSING_SIGFPE)
281     signal(SIGFPE, SIG_DFL);
282 #endif
283 #if !defined(PETSC_MISSING_SIGHUP)
284     signal(SIGHUP, SIG_DFL);
285 #endif
286 #if !defined(PETSC_MISSING_SIGILL)
287     signal(SIGILL, SIG_DFL);
288 #endif
289 #if !defined(PETSC_MISSING_SIGINT)
290     /* signal(SIGINT,  SIG_DFL); */
291 #endif
292 #if !defined(PETSC_MISSING_SIGPIPE)
293     signal(SIGPIPE, SIG_DFL);
294 #endif
295 #if !defined(PETSC_MISSING_SIGQUIT)
296     signal(SIGQUIT, SIG_DFL);
297 #endif
298 #if !defined(PETSC_MISSING_SIGSEGV)
299     signal(SIGSEGV, SIG_DFL);
300 #endif
301 #if !defined(PETSC_MISSING_SIGSYS)
302     signal(SIGSYS, SIG_DFL);
303 #endif
304 #if !defined(PETSC_MISSING_SIGTERM)
305     signal(SIGTERM, SIG_DFL);
306 #endif
307 #if !defined(PETSC_MISSING_SIGTRAP)
308     signal(SIGTRAP, SIG_DFL);
309 #endif
310 #if !defined(PETSC_MISSING_SIGTSTP)
311     /* signal(SIGTSTP, SIG_DFL); */
312 #endif
313 #if !defined(PETSC_MISSING_SIGURG)
314     signal(SIGURG, SIG_DFL);
315 #endif
316 #if !defined(PETSC_MISSING_SIGUSR1)
317     /* signal(SIGUSR1, SIG_DFL); */
318 #endif
319 #if !defined(PETSC_MISSING_SIGUSR2)
320     /* signal(SIGUSR2, SIG_DFL); */
321 #endif
322     SignalSet = PETSC_FALSE;
323   }
324   PetscCall(PetscNew(&newsh));
325   if (sh) {
326     PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
327     newsh->previous = sh;
328   } else newsh->previous = NULL;
329   newsh->handler = routine;
330   newsh->ctx     = ctx;
331   newsh->classid = SIGNAL_CLASSID;
332   sh             = newsh;
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 /*@
337   PetscPopSignalHandler - Removes the most last signal handler that was pushed.
338   If no signal handlers are left on the stack it will remove the PETSc signal handler.
339   (That is PETSc will no longer catch signals).
340 
341   Not Collective
342 
343   Level: developer
344 
345 .seealso: `PetscPushSignalHandler()`
346 @*/
347 PetscErrorCode PetscPopSignalHandler(void)
348 {
349   struct SH *tmp;
350 
351   PetscFunctionBegin;
352   if (!sh) PetscFunctionReturn(PETSC_SUCCESS);
353   PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
354 
355   tmp = sh;
356   sh  = sh->previous;
357   PetscCall(PetscFree(tmp));
358   if (!sh || !sh->handler) {
359 #if !defined(PETSC_MISSING_SIGALRM)
360     /* signal(SIGALRM, SIG_DFL); */
361 #endif
362 #if !defined(PETSC_MISSING_SIGBUS)
363     signal(SIGBUS, SIG_DFL);
364 #endif
365 #if !defined(PETSC_MISSING_SIGCONT)
366     /* signal(SIGCONT, SIG_DFL); */
367 #endif
368 #if !defined(PETSC_MISSING_SIGFPE)
369     signal(SIGFPE, SIG_DFL);
370 #endif
371 #if !defined(PETSC_MISSING_SIGHUP)
372     signal(SIGHUP, SIG_DFL);
373 #endif
374 #if !defined(PETSC_MISSING_SIGILL)
375     signal(SIGILL, SIG_DFL);
376 #endif
377 #if !defined(PETSC_MISSING_SIGINT)
378     /* signal(SIGINT,  SIG_DFL); */
379 #endif
380 #if !defined(PETSC_MISSING_SIGPIPE)
381     signal(SIGPIPE, SIG_DFL);
382 #endif
383 #if !defined(PETSC_MISSING_SIGQUIT)
384     signal(SIGQUIT, SIG_DFL);
385 #endif
386 #if !defined(PETSC_MISSING_SIGSEGV)
387     signal(SIGSEGV, SIG_DFL);
388 #endif
389 #if !defined(PETSC_MISSING_SIGSYS)
390     signal(SIGSYS, SIG_DFL);
391 #endif
392 #if !defined(PETSC_MISSING_SIGTERM)
393     signal(SIGTERM, SIG_DFL);
394 #endif
395 #if !defined(PETSC_MISSING_SIGTRAP)
396     signal(SIGTRAP, SIG_DFL);
397 #endif
398 #if !defined(PETSC_MISSING_SIGTSTP)
399     /* signal(SIGTSTP, SIG_DFL); */
400 #endif
401 #if !defined(PETSC_MISSING_SIGURG)
402     signal(SIGURG, SIG_DFL);
403 #endif
404 #if !defined(PETSC_MISSING_SIGUSR1)
405     /* signal(SIGUSR1, SIG_DFL); */
406 #endif
407 #if !defined(PETSC_MISSING_SIGUSR2)
408     /* signal(SIGUSR2, SIG_DFL); */
409 #endif
410     SignalSet = PETSC_FALSE;
411   } else {
412     SignalSet = PETSC_TRUE;
413   }
414   PetscFunctionReturn(PETSC_SUCCESS);
415 }
416