xref: /petsc/src/sys/error/signal.c (revision 21e3ffae2f3b73c0bd738cf6d0a809700fc04bb0)
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h> /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId classid;
14   PetscErrorCode (*handler)(int, void *);
15   void      *ctx;
16   struct SH *previous;
17 };
18 static struct SH *sh        = NULL;
19 static PetscBool  SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void)
25 {
26   _Exit(MPI_ERR_OTHER);
27 }
28 
29 /*
30     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
31              any signal handler set by PETSc or the application code.
32 
33    Input Parameters: (depends on system)
34 .    sig - integer code indicating the type of signal
35 .    code - ??
36 .    sigcontext - ??
37 .    addr - ??
38 
39 */
40 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
41 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr)
42 #else
43 static void PetscSignalHandler_Private(int sig)
44 #endif
45 {
46   PetscErrorCode ierr;
47 
48   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, (void *)0);
49   else {
50     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted");
51     ierr = (*sh->handler)(sig, sh->ctx);
52   }
53   if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR);
54 }
55 
56 /*@
57    PetscSignalHandlerDefault - Default signal handler.
58 
59    Not Collective
60 
61    Input Parameters:
62 +  sig - signal value
63 -  ptr - unused pointer
64 
65    Developer Note:
66    This does not call `PetscError()`, handles the entire error process directly
67 
68    Level: advanced
69 
70 @*/
71 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr)
72 {
73   PetscErrorCode ierr;
74   const char    *SIGNAME[64];
75 
76   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
77   SIGNAME[0] = "Unknown signal";
78 #if !defined(PETSC_MISSING_SIGABRT)
79   SIGNAME[SIGABRT] = "Abort";
80 #endif
81 #if !defined(PETSC_MISSING_SIGALRM)
82   SIGNAME[SIGALRM] = "Alarm";
83 #endif
84 #if !defined(PETSC_MISSING_SIGBUS)
85   SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access";
86 #endif
87 #if !defined(PETSC_MISSING_SIGCHLD)
88   SIGNAME[SIGCHLD] = "CHLD";
89 #endif
90 #if !defined(PETSC_MISSING_SIGCONT)
91   SIGNAME[SIGCONT] = "CONT";
92 #endif
93 #if !defined(PETSC_MISSING_SIGFPE)
94   SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero";
95 #endif
96 #if !defined(PETSC_MISSING_SIGHUP)
97   SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end";
98 #endif
99 #if !defined(PETSC_MISSING_SIGILL)
100   SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption";
101 #endif
102 #if !defined(PETSC_MISSING_SIGINT)
103   SIGNAME[SIGINT] = "Interrupt";
104 #endif
105 #if !defined(PETSC_MISSING_SIGKILL)
106   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
107 #endif
108 #if !defined(PETSC_MISSING_SIGPIPE)
109   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
110 #endif
111 #if !defined(PETSC_MISSING_SIGQUIT)
112   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
113 #endif
114 #if !defined(PETSC_MISSING_SIGSEGV)
115   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
116 #endif
117 #if !defined(PETSC_MISSING_SIGSYS)
118   SIGNAME[SIGSYS] = "SYS";
119 #endif
120 #if !defined(PETSC_MISSING_SIGTERM)
121   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
122 #endif
123 #if !defined(PETSC_MISSING_SIGTRAP)
124   SIGNAME[SIGTRAP] = "TRAP";
125 #endif
126 #if !defined(PETSC_MISSING_SIGTSTP)
127   SIGNAME[SIGTSTP] = "TSTP";
128 #endif
129 #if !defined(PETSC_MISSING_SIGURG)
130   SIGNAME[SIGURG] = "URG";
131 #endif
132 #if !defined(PETSC_MISSING_SIGUSR1)
133   SIGNAME[SIGUSR1] = "User 1";
134 #endif
135 #if !defined(PETSC_MISSING_SIGUSR2)
136   SIGNAME[SIGUSR2] = "User 2";
137 #endif
138 
139   signal(sig, SIG_DFL);
140   ierr = PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
141   ierr = (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
142   if (sig >= 0 && sig <= 20) ierr = (*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]);
143   else ierr = (*PetscErrorPrintf)("Caught signal\n");
144 
145   ierr = (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
146   ierr = (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
147 #if defined(PETSC_HAVE_CUDA)
148   ierr = (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
149 #endif
150 #if PetscDefined(USE_DEBUG)
151   #if !PetscDefined(HAVE_THREADSAFETY)
152   ierr = (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
153   ierr = PetscStackView(PETSC_STDOUT);
154   #endif
155 #else
156   ierr = (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
157   ierr = (*PetscErrorPrintf)("to get more information on the crash.\n");
158 #endif
159 #if !defined(PETSC_MISSING_SIGBUS)
160   if (sig == SIGSEGV || sig == SIGBUS) {
161 #else
162   if (sig == SIGSEGV) {
163 #endif
164     PetscBool debug;
165 
166     ierr = PetscMallocGetDebug(&debug, NULL, NULL);
167     if (debug) ierr = PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__);
168     else ierr = (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
169   }
170   atexit(MyExit);
171   (void)ierr;
172   PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_SIG);
173   return PETSC_SUCCESS;
174 }
175 
176 #if !defined(PETSC_SIGNAL_CAST)
177   #define PETSC_SIGNAL_CAST
178 #endif
179 
180 /*@C
181    PetscPushSignalHandler - Catches the usual fatal errors and
182    calls a user-provided routine.
183 
184    Not Collective
185 
186    Input Parameters:
187 +  routine - routine to call when a signal is received
188 -  ctx - optional context needed by the routine
189 
190   Level: developer
191 
192 .seealso: `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
193 @*/
194 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), void *ctx)
195 {
196   struct SH *newsh;
197 
198   PetscFunctionBegin;
199   if (!SIGNAL_CLASSID) {
200     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
201     SIGNAL_CLASSID = 19;
202   }
203   if (!SignalSet && routine) {
204     /* Do not catch ABRT, CHLD, KILL */
205 #if !defined(PETSC_MISSING_SIGALRM)
206     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
207 #endif
208 #if !defined(PETSC_MISSING_SIGBUS)
209     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
210 #endif
211 #if !defined(PETSC_MISSING_SIGCONT)
212     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
213 #endif
214 #if !defined(PETSC_MISSING_SIGFPE)
215     signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
216 #endif
217 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
218     {
219       struct sigaction action;
220       sigaction(SIGHUP, NULL, &action);
221       if (action.sa_handler == SIG_IGN) {
222         PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefor not changing its signal handler\n"));
223       } else {
224         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
225       }
226     }
227 #endif
228 #if !defined(PETSC_MISSING_SIGILL)
229     signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
230 #endif
231 #if !defined(PETSC_MISSING_SIGINT)
232     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
233 #endif
234 #if !defined(PETSC_MISSING_SIGPIPE)
235     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
236 #endif
237 #if !defined(PETSC_MISSING_SIGQUIT)
238     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
239 #endif
240 #if !defined(PETSC_MISSING_SIGSEGV)
241     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
242 #endif
243 #if !defined(PETSC_MISSING_SIGSYS)
244     signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
245 #endif
246 #if !defined(PETSC_MISSING_SIGTERM)
247   #if !defined(OMPI_MAJOR_VERSION)
248     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
249     signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
250   #endif
251 #endif
252 #if !defined(PETSC_MISSING_SIGTRAP)
253     signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
254 #endif
255 #if !defined(PETSC_MISSING_SIGTSTP)
256     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
257 #endif
258 #if !defined(PETSC_MISSING_SIGURG)
259     signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
260 #endif
261 #if !defined(PETSC_MISSING_SIGUSR1)
262     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
263 #endif
264 #if !defined(PETSC_MISSING_SIGUSR2)
265     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
266 #endif
267     SignalSet = PETSC_TRUE;
268   }
269   if (!routine) {
270 #if !defined(PETSC_MISSING_SIGALRM)
271     /* signal(SIGALRM, SIG_DFL); */
272 #endif
273 #if !defined(PETSC_MISSING_SIGBUS)
274     signal(SIGBUS, SIG_DFL);
275 #endif
276 #if !defined(PETSC_MISSING_SIGCONT)
277     /* signal(SIGCONT, SIG_DFL); */
278 #endif
279 #if !defined(PETSC_MISSING_SIGFPE)
280     signal(SIGFPE, SIG_DFL);
281 #endif
282 #if !defined(PETSC_MISSING_SIGHUP)
283     signal(SIGHUP, SIG_DFL);
284 #endif
285 #if !defined(PETSC_MISSING_SIGILL)
286     signal(SIGILL, SIG_DFL);
287 #endif
288 #if !defined(PETSC_MISSING_SIGINT)
289     /* signal(SIGINT,  SIG_DFL); */
290 #endif
291 #if !defined(PETSC_MISSING_SIGPIPE)
292     signal(SIGPIPE, SIG_DFL);
293 #endif
294 #if !defined(PETSC_MISSING_SIGQUIT)
295     signal(SIGQUIT, SIG_DFL);
296 #endif
297 #if !defined(PETSC_MISSING_SIGSEGV)
298     signal(SIGSEGV, SIG_DFL);
299 #endif
300 #if !defined(PETSC_MISSING_SIGSYS)
301     signal(SIGSYS, SIG_DFL);
302 #endif
303 #if !defined(PETSC_MISSING_SIGTERM)
304     signal(SIGTERM, SIG_DFL);
305 #endif
306 #if !defined(PETSC_MISSING_SIGTRAP)
307     signal(SIGTRAP, SIG_DFL);
308 #endif
309 #if !defined(PETSC_MISSING_SIGTSTP)
310     /* signal(SIGTSTP, SIG_DFL); */
311 #endif
312 #if !defined(PETSC_MISSING_SIGURG)
313     signal(SIGURG, SIG_DFL);
314 #endif
315 #if !defined(PETSC_MISSING_SIGUSR1)
316     /* signal(SIGUSR1, SIG_DFL); */
317 #endif
318 #if !defined(PETSC_MISSING_SIGUSR2)
319     /* signal(SIGUSR2, SIG_DFL); */
320 #endif
321     SignalSet = PETSC_FALSE;
322   }
323   PetscCall(PetscNew(&newsh));
324   if (sh) {
325     PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
326     newsh->previous = sh;
327   } else newsh->previous = NULL;
328   newsh->handler = routine;
329   newsh->ctx     = ctx;
330   newsh->classid = SIGNAL_CLASSID;
331   sh             = newsh;
332   PetscFunctionReturn(PETSC_SUCCESS);
333 }
334 
335 /*@
336    PetscPopSignalHandler - Removes the most last signal handler that was pushed.
337        If no signal handlers are left on the stack it will remove the PETSc signal handler.
338        (That is PETSc will no longer catch signals).
339 
340    Not Collective
341 
342   Level: developer
343 
344 .seealso: `PetscPushSignalHandler()`
345 @*/
346 PetscErrorCode PetscPopSignalHandler(void)
347 {
348   struct SH *tmp;
349 
350   PetscFunctionBegin;
351   if (!sh) PetscFunctionReturn(PETSC_SUCCESS);
352   PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
353 
354   tmp = sh;
355   sh  = sh->previous;
356   PetscCall(PetscFree(tmp));
357   if (!sh || !sh->handler) {
358 #if !defined(PETSC_MISSING_SIGALRM)
359     /* signal(SIGALRM, SIG_DFL); */
360 #endif
361 #if !defined(PETSC_MISSING_SIGBUS)
362     signal(SIGBUS, SIG_DFL);
363 #endif
364 #if !defined(PETSC_MISSING_SIGCONT)
365     /* signal(SIGCONT, SIG_DFL); */
366 #endif
367 #if !defined(PETSC_MISSING_SIGFPE)
368     signal(SIGFPE, SIG_DFL);
369 #endif
370 #if !defined(PETSC_MISSING_SIGHUP)
371     signal(SIGHUP, SIG_DFL);
372 #endif
373 #if !defined(PETSC_MISSING_SIGILL)
374     signal(SIGILL, SIG_DFL);
375 #endif
376 #if !defined(PETSC_MISSING_SIGINT)
377     /* signal(SIGINT,  SIG_DFL); */
378 #endif
379 #if !defined(PETSC_MISSING_SIGPIPE)
380     signal(SIGPIPE, SIG_DFL);
381 #endif
382 #if !defined(PETSC_MISSING_SIGQUIT)
383     signal(SIGQUIT, SIG_DFL);
384 #endif
385 #if !defined(PETSC_MISSING_SIGSEGV)
386     signal(SIGSEGV, SIG_DFL);
387 #endif
388 #if !defined(PETSC_MISSING_SIGSYS)
389     signal(SIGSYS, SIG_DFL);
390 #endif
391 #if !defined(PETSC_MISSING_SIGTERM)
392     signal(SIGTERM, SIG_DFL);
393 #endif
394 #if !defined(PETSC_MISSING_SIGTRAP)
395     signal(SIGTRAP, SIG_DFL);
396 #endif
397 #if !defined(PETSC_MISSING_SIGTSTP)
398     /* signal(SIGTSTP, SIG_DFL); */
399 #endif
400 #if !defined(PETSC_MISSING_SIGURG)
401     signal(SIGURG, SIG_DFL);
402 #endif
403 #if !defined(PETSC_MISSING_SIGUSR1)
404     /* signal(SIGUSR1, SIG_DFL); */
405 #endif
406 #if !defined(PETSC_MISSING_SIGUSR2)
407     /* signal(SIGUSR2, SIG_DFL); */
408 #endif
409     SignalSet = PETSC_FALSE;
410   } else {
411     SignalSet = PETSC_TRUE;
412   }
413   PetscFunctionReturn(PETSC_SUCCESS);
414 }
415