xref: /petsc/src/sys/error/signal.c (revision b94d7ded0a05f1bbd5e48daa6f92b28259c75b44)
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h>             /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId   classid;
14   PetscErrorCode (*handler)(int,void*);
15   void           *ctx;
16   struct SH      *previous;
17 };
18 static struct SH *sh       = NULL;
19 static PetscBool SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void)
25 {
26   _Exit(MPI_ERR_OTHER);
27 }
28 
29 /*
30     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
31              any signal handler set by PETSc or the application code.
32 
33    Input Parameters: (depends on system)
34 .    sig - integer code indicating the type of signal
35 .    code - ??
36 .    sigcontext - ??
37 .    addr - ??
38 
39 */
40 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
41 static void PetscSignalHandler_Private(int sig,int code,struct sigcontext * scp,char *addr)
42 #else
43 static void PetscSignalHandler_Private(int sig)
44 #endif
45 {
46   PetscErrorCode ierr;
47 
48   PetscFunctionBegin;
49   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig,(void*)0);
50   else {
51     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_COR,"Signal object has been corrupted");
52     ierr = (*sh->handler)(sig,sh->ctx);
53   }
54   if (ierr) PETSCABORT(PETSC_COMM_WORLD,PETSC_ERR_COR);
55 }
56 
57 /*@
58    PetscSignalHandlerDefault - Default signal handler.
59 
60    Not Collective
61 
62    Level: advanced
63 
64    Input Parameters:
65 +  sig - signal value
66 -  ptr - unused pointer
67 
68 @*/
69 PetscErrorCode  PetscSignalHandlerDefault(int sig,void *ptr)
70 {
71   PetscErrorCode ierr;
72   const char     *SIGNAME[64];
73 
74   PetscFunctionBegin;
75   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
76   SIGNAME[0]       = "Unknown signal";
77 #if !defined(PETSC_MISSING_SIGABRT)
78   SIGNAME[SIGABRT] = "Abort";
79 #endif
80 #if !defined(PETSC_MISSING_SIGALRM)
81   SIGNAME[SIGALRM] = "Alarm";
82 #endif
83 #if !defined(PETSC_MISSING_SIGBUS)
84   SIGNAME[SIGBUS]  = "BUS: Bus Error, possibly illegal memory access";
85 #endif
86 #if !defined(PETSC_MISSING_SIGCHLD)
87   SIGNAME[SIGCHLD] = "CHLD";
88 #endif
89 #if !defined(PETSC_MISSING_SIGCONT)
90   SIGNAME[SIGCONT] = "CONT";
91 #endif
92 #if !defined(PETSC_MISSING_SIGFPE)
93   SIGNAME[SIGFPE]  = "FPE: Floating Point Exception,probably divide by zero";
94 #endif
95 #if !defined(PETSC_MISSING_SIGHUP)
96   SIGNAME[SIGHUP]  = "Hang up: Some other process (or the batch system) has told this process to end";
97 #endif
98 #if !defined(PETSC_MISSING_SIGILL)
99   SIGNAME[SIGILL]  = "Illegal instruction: Likely due to memory corruption";
100 #endif
101 #if !defined(PETSC_MISSING_SIGINT)
102   SIGNAME[SIGINT]  = "Interrupt";
103 #endif
104 #if !defined(PETSC_MISSING_SIGKILL)
105   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
106 #endif
107 #if !defined(PETSC_MISSING_SIGPIPE)
108   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
109 #endif
110 #if !defined(PETSC_MISSING_SIGQUIT)
111   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
112 #endif
113 #if !defined(PETSC_MISSING_SIGSEGV)
114   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
115 #endif
116 #if !defined(PETSC_MISSING_SIGSYS)
117   SIGNAME[SIGSYS]  = "SYS";
118 #endif
119 #if !defined(PETSC_MISSING_SIGTERM)
120   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
121 #endif
122 #if !defined(PETSC_MISSING_SIGTRAP)
123   SIGNAME[SIGTRAP] = "TRAP";
124 #endif
125 #if !defined(PETSC_MISSING_SIGTSTP)
126   SIGNAME[SIGTSTP] = "TSTP";
127 #endif
128 #if !defined(PETSC_MISSING_SIGURG)
129   SIGNAME[SIGURG]  = "URG";
130 #endif
131 #if !defined(PETSC_MISSING_SIGUSR1)
132   SIGNAME[SIGUSR1] = "User 1";
133 #endif
134 #if !defined(PETSC_MISSING_SIGUSR2)
135   SIGNAME[SIGUSR2] = "User 2";
136 #endif
137 
138   signal(sig,SIG_DFL);
139   (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
140   if (sig >= 0 && sig <= 20) (*PetscErrorPrintf)("Caught signal number %d %s\n",sig,SIGNAME[sig]);
141   else (*PetscErrorPrintf)("Caught signal\n");
142 
143   (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
144   (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind\n");
145   (*PetscErrorPrintf)("or try http://valgrind.org on GNU/linux and Apple MacOS to find memory corruption errors\n");
146 #if defined(PETSC_HAVE_CUDA)
147   (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
148 #endif
149 #if PetscDefined(USE_DEBUG)
150   PetscStackPop;  /* remove stack frames for error handlers */
151   PetscStackPop;
152   (*PetscErrorPrintf)("likely location of problem given in stack below\n");
153   (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
154   PetscStackView(PETSC_STDOUT);
155 #else
156   (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
157   (*PetscErrorPrintf)("to get more information on the crash.\n");
158 #endif
159   ierr =  PetscError(PETSC_COMM_SELF,0,NULL,NULL,PETSC_ERR_SIG,PETSC_ERROR_INITIAL,NULL);
160 #if !defined(PETSC_MISSING_SIGBUS)
161   if (sig == SIGSEGV || sig == SIGBUS) {
162 #else
163   if (sig == SIGSEGV) {
164 #endif
165     PetscBool debug;
166 
167     PetscMallocGetDebug(&debug,NULL,NULL);
168     if (debug) {
169       (*PetscErrorPrintf)("Checking the memory for corruption.\n");
170       PetscMallocValidate(__LINE__,PETSC_FUNCTION_NAME,__FILE__);
171     } else {
172       (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
173     }
174   }
175   atexit(MyExit);
176   PETSCABORT(PETSC_COMM_WORLD,(int)ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #if !defined(PETSC_SIGNAL_CAST)
181 #define PETSC_SIGNAL_CAST
182 #endif
183 
184 /*@C
185    PetscPushSignalHandler - Catches the usual fatal errors and
186    calls a user-provided routine.
187 
188    Not Collective
189 
190    Input Parameters:
191 +  routine - routine to call when a signal is received
192 -  ctx - optional context needed by the routine
193 
194   Level: developer
195 
196 .seealso: `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
197 
198 @*/
199 PetscErrorCode  PetscPushSignalHandler(PetscErrorCode (*routine)(int,void*),void *ctx)
200 {
201   struct  SH     *newsh;
202 
203   PetscFunctionBegin;
204   if (!SIGNAL_CLASSID) {
205     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
206     SIGNAL_CLASSID = 19;
207   }
208   if (!SignalSet && routine) {
209     /* Do not catch ABRT, CHLD, KILL */
210 #if !defined(PETSC_MISSING_SIGALRM)
211     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
212 #endif
213 #if !defined(PETSC_MISSING_SIGBUS)
214     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
215 #endif
216 #if !defined(PETSC_MISSING_SIGCONT)
217     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
218 #endif
219 #if !defined(PETSC_MISSING_SIGFPE)
220     signal(SIGFPE,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
221 #endif
222 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
223     {
224       struct  sigaction action;
225       sigaction(SIGHUP,NULL,&action);
226       if (action.sa_handler == SIG_IGN) {
227         PetscCall(PetscInfo(NULL,"SIGHUP previously set to ignore, therefor not changing its signal handler\n"));
228       } else {
229         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
230       }
231     }
232 #endif
233 #if !defined(PETSC_MISSING_SIGILL)
234     signal(SIGILL,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
235 #endif
236 #if !defined(PETSC_MISSING_SIGINT)
237     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
238 #endif
239 #if !defined(PETSC_MISSING_SIGPIPE)
240     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
241 #endif
242 #if !defined(PETSC_MISSING_SIGQUIT)
243     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
244 #endif
245 #if !defined(PETSC_MISSING_SIGSEGV)
246     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
247 #endif
248 #if !defined(PETSC_MISSING_SIGSYS)
249     signal(SIGSYS,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
250 #endif
251 #if !defined(PETSC_MISSING_SIGTERM)
252 #if !defined(OMPI_MAJOR_VERSION)
253     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
254     signal(SIGTERM,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
255 #endif
256 #endif
257 #if !defined(PETSC_MISSING_SIGTRAP)
258     signal(SIGTRAP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
259 #endif
260 #if !defined(PETSC_MISSING_SIGTSTP)
261     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
262 #endif
263 #if !defined(PETSC_MISSING_SIGURG)
264     signal(SIGURG,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
265 #endif
266 #if !defined(PETSC_MISSING_SIGUSR1)
267     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
268 #endif
269 #if !defined(PETSC_MISSING_SIGUSR2)
270     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
271 #endif
272     SignalSet = PETSC_TRUE;
273   }
274   if (!routine) {
275 #if !defined(PETSC_MISSING_SIGALRM)
276     /* signal(SIGALRM, SIG_DFL); */
277 #endif
278 #if !defined(PETSC_MISSING_SIGBUS)
279     signal(SIGBUS,  SIG_DFL);
280 #endif
281 #if !defined(PETSC_MISSING_SIGCONT)
282     /* signal(SIGCONT, SIG_DFL); */
283 #endif
284 #if !defined(PETSC_MISSING_SIGFPE)
285     signal(SIGFPE,  SIG_DFL);
286 #endif
287 #if !defined(PETSC_MISSING_SIGHUP)
288     signal(SIGHUP,  SIG_DFL);
289 #endif
290 #if !defined(PETSC_MISSING_SIGILL)
291     signal(SIGILL,  SIG_DFL);
292 #endif
293 #if !defined(PETSC_MISSING_SIGINT)
294     /* signal(SIGINT,  SIG_DFL); */
295 #endif
296 #if !defined(PETSC_MISSING_SIGPIPE)
297     signal(SIGPIPE, SIG_DFL);
298 #endif
299 #if !defined(PETSC_MISSING_SIGQUIT)
300     signal(SIGQUIT, SIG_DFL);
301 #endif
302 #if !defined(PETSC_MISSING_SIGSEGV)
303     signal(SIGSEGV, SIG_DFL);
304 #endif
305 #if !defined(PETSC_MISSING_SIGSYS)
306     signal(SIGSYS,  SIG_DFL);
307 #endif
308 #if !defined(PETSC_MISSING_SIGTERM)
309     signal(SIGTERM, SIG_DFL);
310 #endif
311 #if !defined(PETSC_MISSING_SIGTRAP)
312     signal(SIGTRAP, SIG_DFL);
313 #endif
314 #if !defined(PETSC_MISSING_SIGTSTP)
315     /* signal(SIGTSTP, SIG_DFL); */
316 #endif
317 #if !defined(PETSC_MISSING_SIGURG)
318     signal(SIGURG,  SIG_DFL);
319 #endif
320 #if !defined(PETSC_MISSING_SIGUSR1)
321     /* signal(SIGUSR1, SIG_DFL); */
322 #endif
323 #if !defined(PETSC_MISSING_SIGUSR2)
324     /* signal(SIGUSR2, SIG_DFL); */
325 #endif
326     SignalSet = PETSC_FALSE;
327   }
328   PetscCall(PetscNew(&newsh));
329   if (sh) {
330     PetscCheck(sh->classid == SIGNAL_CLASSID,PETSC_COMM_SELF,PETSC_ERR_COR,"Signal object has been corrupted");
331     newsh->previous = sh;
332   }  else newsh->previous = NULL;
333   newsh->handler = routine;
334   newsh->ctx     = ctx;
335   newsh->classid = SIGNAL_CLASSID;
336   sh             = newsh;
337   PetscFunctionReturn(0);
338 }
339 
340 /*@
341    PetscPopSignalHandler - Removes the most last signal handler that was pushed.
342        If no signal handlers are left on the stack it will remove the PETSc signal handler.
343        (That is PETSc will no longer catch signals).
344 
345    Not Collective
346 
347   Level: developer
348 
349 .seealso: `PetscPushSignalHandler()`
350 
351 @*/
352 PetscErrorCode  PetscPopSignalHandler(void)
353 {
354   struct SH      *tmp;
355 
356   PetscFunctionBegin;
357   if (!sh) PetscFunctionReturn(0);
358   PetscCheck(sh->classid == SIGNAL_CLASSID,PETSC_COMM_SELF,PETSC_ERR_COR,"Signal object has been corrupted");
359 
360   tmp = sh;
361   sh  = sh->previous;
362   PetscCall(PetscFree(tmp));
363   if (!sh || !sh->handler) {
364 #if !defined(PETSC_MISSING_SIGALRM)
365     /* signal(SIGALRM, SIG_DFL); */
366 #endif
367 #if !defined(PETSC_MISSING_SIGBUS)
368     signal(SIGBUS,  SIG_DFL);
369 #endif
370 #if !defined(PETSC_MISSING_SIGCONT)
371     /* signal(SIGCONT, SIG_DFL); */
372 #endif
373 #if !defined(PETSC_MISSING_SIGFPE)
374     signal(SIGFPE,  SIG_DFL);
375 #endif
376 #if !defined(PETSC_MISSING_SIGHUP)
377     signal(SIGHUP,  SIG_DFL);
378 #endif
379 #if !defined(PETSC_MISSING_SIGILL)
380     signal(SIGILL,  SIG_DFL);
381 #endif
382 #if !defined(PETSC_MISSING_SIGINT)
383     /* signal(SIGINT,  SIG_DFL); */
384 #endif
385 #if !defined(PETSC_MISSING_SIGPIPE)
386     signal(SIGPIPE, SIG_DFL);
387 #endif
388 #if !defined(PETSC_MISSING_SIGQUIT)
389     signal(SIGQUIT, SIG_DFL);
390 #endif
391 #if !defined(PETSC_MISSING_SIGSEGV)
392     signal(SIGSEGV, SIG_DFL);
393 #endif
394 #if !defined(PETSC_MISSING_SIGSYS)
395     signal(SIGSYS,  SIG_DFL);
396 #endif
397 #if !defined(PETSC_MISSING_SIGTERM)
398     signal(SIGTERM, SIG_DFL);
399 #endif
400 #if !defined(PETSC_MISSING_SIGTRAP)
401     signal(SIGTRAP, SIG_DFL);
402 #endif
403 #if !defined(PETSC_MISSING_SIGTSTP)
404     /* signal(SIGTSTP, SIG_DFL); */
405 #endif
406 #if !defined(PETSC_MISSING_SIGURG)
407     signal(SIGURG,  SIG_DFL);
408 #endif
409 #if !defined(PETSC_MISSING_SIGUSR1)
410     /* signal(SIGUSR1, SIG_DFL); */
411 #endif
412 #if !defined(PETSC_MISSING_SIGUSR2)
413     /* signal(SIGUSR2, SIG_DFL); */
414 #endif
415     SignalSet = PETSC_FALSE;
416   } else {
417     SignalSet = PETSC_TRUE;
418   }
419   PetscFunctionReturn(0);
420 }
421