xref: /petsc/src/sys/error/signal.c (revision 2e16c0ce58b3a4ec287cbc0a0807bfb0a0fa5ac9)
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h>             /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId   classid;
14   PetscErrorCode (*handler)(int,void*);
15   void           *ctx;
16   struct SH      *previous;
17 };
18 static struct SH *sh       = NULL;
19 static PetscBool SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void)
25 {
26   _Exit(MPI_ERR_OTHER);
27 }
28 
29 /*
30     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
31              any signal handler set by PETSc or the application code.
32 
33    Input Parameters: (depends on system)
34 .    sig - integer code indicating the type of signal
35 .    code - ??
36 .    sigcontext - ??
37 .    addr - ??
38 
39 */
40 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
41 static void PetscSignalHandler_Private(int sig,int code,struct sigcontext * scp,char *addr)
42 #else
43 static void PetscSignalHandler_Private(int sig)
44 #endif
45 {
46   PetscErrorCode ierr;
47 
48   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig,(void*)0);
49   else {
50     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_COR,"Signal object has been corrupted");
51     ierr = (*sh->handler)(sig,sh->ctx);
52   }
53   if (ierr) PETSCABORT(PETSC_COMM_WORLD,PETSC_ERR_COR);
54 }
55 
56 /*@
57    PetscSignalHandlerDefault - Default signal handler.
58 
59    Not Collective
60 
61    Level: advanced
62 
63    Input Parameters:
64 +  sig - signal value
65 -  ptr - unused pointer
66 
67 @*/
68 PetscErrorCode  PetscSignalHandlerDefault(int sig,void *ptr)
69 {
70   PetscErrorCode ierr;
71   const char     *SIGNAME[64];
72 
73   PetscFunctionBegin;
74   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
75   SIGNAME[0]       = "Unknown signal";
76 #if !defined(PETSC_MISSING_SIGABRT)
77   SIGNAME[SIGABRT] = "Abort";
78 #endif
79 #if !defined(PETSC_MISSING_SIGALRM)
80   SIGNAME[SIGALRM] = "Alarm";
81 #endif
82 #if !defined(PETSC_MISSING_SIGBUS)
83   SIGNAME[SIGBUS]  = "BUS: Bus Error, possibly illegal memory access";
84 #endif
85 #if !defined(PETSC_MISSING_SIGCHLD)
86   SIGNAME[SIGCHLD] = "CHLD";
87 #endif
88 #if !defined(PETSC_MISSING_SIGCONT)
89   SIGNAME[SIGCONT] = "CONT";
90 #endif
91 #if !defined(PETSC_MISSING_SIGFPE)
92   SIGNAME[SIGFPE]  = "FPE: Floating Point Exception,probably divide by zero";
93 #endif
94 #if !defined(PETSC_MISSING_SIGHUP)
95   SIGNAME[SIGHUP]  = "Hang up: Some other process (or the batch system) has told this process to end";
96 #endif
97 #if !defined(PETSC_MISSING_SIGILL)
98   SIGNAME[SIGILL]  = "Illegal instruction: Likely due to memory corruption";
99 #endif
100 #if !defined(PETSC_MISSING_SIGINT)
101   SIGNAME[SIGINT]  = "Interrupt";
102 #endif
103 #if !defined(PETSC_MISSING_SIGKILL)
104   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
105 #endif
106 #if !defined(PETSC_MISSING_SIGPIPE)
107   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
108 #endif
109 #if !defined(PETSC_MISSING_SIGQUIT)
110   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
111 #endif
112 #if !defined(PETSC_MISSING_SIGSEGV)
113   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
114 #endif
115 #if !defined(PETSC_MISSING_SIGSYS)
116   SIGNAME[SIGSYS]  = "SYS";
117 #endif
118 #if !defined(PETSC_MISSING_SIGTERM)
119   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
120 #endif
121 #if !defined(PETSC_MISSING_SIGTRAP)
122   SIGNAME[SIGTRAP] = "TRAP";
123 #endif
124 #if !defined(PETSC_MISSING_SIGTSTP)
125   SIGNAME[SIGTSTP] = "TSTP";
126 #endif
127 #if !defined(PETSC_MISSING_SIGURG)
128   SIGNAME[SIGURG]  = "URG";
129 #endif
130 #if !defined(PETSC_MISSING_SIGUSR1)
131   SIGNAME[SIGUSR1] = "User 1";
132 #endif
133 #if !defined(PETSC_MISSING_SIGUSR2)
134   SIGNAME[SIGUSR2] = "User 2";
135 #endif
136 
137   signal(sig,SIG_DFL);
138   (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
139   if (sig >= 0 && sig <= 20) (*PetscErrorPrintf)("Caught signal number %d %s\n",sig,SIGNAME[sig]);
140   else (*PetscErrorPrintf)("Caught signal\n");
141 
142   (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
143   (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind\n");
144   (*PetscErrorPrintf)("or try http://valgrind.org on GNU/linux and Apple MacOS to find memory corruption errors\n");
145 #if defined(PETSC_HAVE_CUDA)
146   (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
147 #endif
148 #if PetscDefined(USE_DEBUG)
149   PetscStackPop;  /* remove stack frames for error handlers */
150   PetscStackPop;
151   (*PetscErrorPrintf)("likely location of problem given in stack below\n");
152   (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
153   PetscStackView(PETSC_STDOUT);
154 #else
155   (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
156   (*PetscErrorPrintf)("to get more information on the crash.\n");
157 #endif
158   ierr =  PetscError(PETSC_COMM_SELF,0,NULL,NULL,PETSC_ERR_SIG,PETSC_ERROR_INITIAL,NULL);
159 #if !defined(PETSC_MISSING_SIGBUS)
160   if (sig == SIGSEGV || sig == SIGBUS) {
161 #else
162   if (sig == SIGSEGV) {
163 #endif
164     PetscBool debug;
165 
166     PetscMallocGetDebug(&debug,NULL,NULL);
167     if (debug) {
168       (*PetscErrorPrintf)("Checking the memory for corruption.\n");
169       PetscMallocValidate(__LINE__,PETSC_FUNCTION_NAME,__FILE__);
170     } else {
171       (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
172     }
173   }
174   atexit(MyExit);
175   PETSCABORT(PETSC_COMM_WORLD,(int)ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 #if !defined(PETSC_SIGNAL_CAST)
180 #define PETSC_SIGNAL_CAST
181 #endif
182 
183 /*@C
184    PetscPushSignalHandler - Catches the usual fatal errors and
185    calls a user-provided routine.
186 
187    Not Collective
188 
189    Input Parameters:
190 +  routine - routine to call when a signal is received
191 -  ctx - optional context needed by the routine
192 
193   Level: developer
194 
195 .seealso: `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
196 
197 @*/
198 PetscErrorCode  PetscPushSignalHandler(PetscErrorCode (*routine)(int,void*),void *ctx)
199 {
200   struct  SH     *newsh;
201 
202   PetscFunctionBegin;
203   if (!SIGNAL_CLASSID) {
204     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
205     SIGNAL_CLASSID = 19;
206   }
207   if (!SignalSet && routine) {
208     /* Do not catch ABRT, CHLD, KILL */
209 #if !defined(PETSC_MISSING_SIGALRM)
210     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
211 #endif
212 #if !defined(PETSC_MISSING_SIGBUS)
213     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
214 #endif
215 #if !defined(PETSC_MISSING_SIGCONT)
216     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
217 #endif
218 #if !defined(PETSC_MISSING_SIGFPE)
219     signal(SIGFPE,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
220 #endif
221 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
222     {
223       struct  sigaction action;
224       sigaction(SIGHUP,NULL,&action);
225       if (action.sa_handler == SIG_IGN) {
226         PetscCall(PetscInfo(NULL,"SIGHUP previously set to ignore, therefor not changing its signal handler\n"));
227       } else {
228         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
229       }
230     }
231 #endif
232 #if !defined(PETSC_MISSING_SIGILL)
233     signal(SIGILL,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
234 #endif
235 #if !defined(PETSC_MISSING_SIGINT)
236     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
237 #endif
238 #if !defined(PETSC_MISSING_SIGPIPE)
239     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
240 #endif
241 #if !defined(PETSC_MISSING_SIGQUIT)
242     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
243 #endif
244 #if !defined(PETSC_MISSING_SIGSEGV)
245     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
246 #endif
247 #if !defined(PETSC_MISSING_SIGSYS)
248     signal(SIGSYS,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
249 #endif
250 #if !defined(PETSC_MISSING_SIGTERM)
251 #if !defined(OMPI_MAJOR_VERSION)
252     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
253     signal(SIGTERM,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
254 #endif
255 #endif
256 #if !defined(PETSC_MISSING_SIGTRAP)
257     signal(SIGTRAP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
258 #endif
259 #if !defined(PETSC_MISSING_SIGTSTP)
260     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
261 #endif
262 #if !defined(PETSC_MISSING_SIGURG)
263     signal(SIGURG,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
264 #endif
265 #if !defined(PETSC_MISSING_SIGUSR1)
266     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
267 #endif
268 #if !defined(PETSC_MISSING_SIGUSR2)
269     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
270 #endif
271     SignalSet = PETSC_TRUE;
272   }
273   if (!routine) {
274 #if !defined(PETSC_MISSING_SIGALRM)
275     /* signal(SIGALRM, SIG_DFL); */
276 #endif
277 #if !defined(PETSC_MISSING_SIGBUS)
278     signal(SIGBUS,  SIG_DFL);
279 #endif
280 #if !defined(PETSC_MISSING_SIGCONT)
281     /* signal(SIGCONT, SIG_DFL); */
282 #endif
283 #if !defined(PETSC_MISSING_SIGFPE)
284     signal(SIGFPE,  SIG_DFL);
285 #endif
286 #if !defined(PETSC_MISSING_SIGHUP)
287     signal(SIGHUP,  SIG_DFL);
288 #endif
289 #if !defined(PETSC_MISSING_SIGILL)
290     signal(SIGILL,  SIG_DFL);
291 #endif
292 #if !defined(PETSC_MISSING_SIGINT)
293     /* signal(SIGINT,  SIG_DFL); */
294 #endif
295 #if !defined(PETSC_MISSING_SIGPIPE)
296     signal(SIGPIPE, SIG_DFL);
297 #endif
298 #if !defined(PETSC_MISSING_SIGQUIT)
299     signal(SIGQUIT, SIG_DFL);
300 #endif
301 #if !defined(PETSC_MISSING_SIGSEGV)
302     signal(SIGSEGV, SIG_DFL);
303 #endif
304 #if !defined(PETSC_MISSING_SIGSYS)
305     signal(SIGSYS,  SIG_DFL);
306 #endif
307 #if !defined(PETSC_MISSING_SIGTERM)
308     signal(SIGTERM, SIG_DFL);
309 #endif
310 #if !defined(PETSC_MISSING_SIGTRAP)
311     signal(SIGTRAP, SIG_DFL);
312 #endif
313 #if !defined(PETSC_MISSING_SIGTSTP)
314     /* signal(SIGTSTP, SIG_DFL); */
315 #endif
316 #if !defined(PETSC_MISSING_SIGURG)
317     signal(SIGURG,  SIG_DFL);
318 #endif
319 #if !defined(PETSC_MISSING_SIGUSR1)
320     /* signal(SIGUSR1, SIG_DFL); */
321 #endif
322 #if !defined(PETSC_MISSING_SIGUSR2)
323     /* signal(SIGUSR2, SIG_DFL); */
324 #endif
325     SignalSet = PETSC_FALSE;
326   }
327   PetscCall(PetscNew(&newsh));
328   if (sh) {
329     PetscCheck(sh->classid == SIGNAL_CLASSID,PETSC_COMM_SELF,PETSC_ERR_COR,"Signal object has been corrupted");
330     newsh->previous = sh;
331   }  else newsh->previous = NULL;
332   newsh->handler = routine;
333   newsh->ctx     = ctx;
334   newsh->classid = SIGNAL_CLASSID;
335   sh             = newsh;
336   PetscFunctionReturn(0);
337 }
338 
339 /*@
340    PetscPopSignalHandler - Removes the most last signal handler that was pushed.
341        If no signal handlers are left on the stack it will remove the PETSc signal handler.
342        (That is PETSc will no longer catch signals).
343 
344    Not Collective
345 
346   Level: developer
347 
348 .seealso: `PetscPushSignalHandler()`
349 
350 @*/
351 PetscErrorCode  PetscPopSignalHandler(void)
352 {
353   struct SH      *tmp;
354 
355   PetscFunctionBegin;
356   if (!sh) PetscFunctionReturn(0);
357   PetscCheck(sh->classid == SIGNAL_CLASSID,PETSC_COMM_SELF,PETSC_ERR_COR,"Signal object has been corrupted");
358 
359   tmp = sh;
360   sh  = sh->previous;
361   PetscCall(PetscFree(tmp));
362   if (!sh || !sh->handler) {
363 #if !defined(PETSC_MISSING_SIGALRM)
364     /* signal(SIGALRM, SIG_DFL); */
365 #endif
366 #if !defined(PETSC_MISSING_SIGBUS)
367     signal(SIGBUS,  SIG_DFL);
368 #endif
369 #if !defined(PETSC_MISSING_SIGCONT)
370     /* signal(SIGCONT, SIG_DFL); */
371 #endif
372 #if !defined(PETSC_MISSING_SIGFPE)
373     signal(SIGFPE,  SIG_DFL);
374 #endif
375 #if !defined(PETSC_MISSING_SIGHUP)
376     signal(SIGHUP,  SIG_DFL);
377 #endif
378 #if !defined(PETSC_MISSING_SIGILL)
379     signal(SIGILL,  SIG_DFL);
380 #endif
381 #if !defined(PETSC_MISSING_SIGINT)
382     /* signal(SIGINT,  SIG_DFL); */
383 #endif
384 #if !defined(PETSC_MISSING_SIGPIPE)
385     signal(SIGPIPE, SIG_DFL);
386 #endif
387 #if !defined(PETSC_MISSING_SIGQUIT)
388     signal(SIGQUIT, SIG_DFL);
389 #endif
390 #if !defined(PETSC_MISSING_SIGSEGV)
391     signal(SIGSEGV, SIG_DFL);
392 #endif
393 #if !defined(PETSC_MISSING_SIGSYS)
394     signal(SIGSYS,  SIG_DFL);
395 #endif
396 #if !defined(PETSC_MISSING_SIGTERM)
397     signal(SIGTERM, SIG_DFL);
398 #endif
399 #if !defined(PETSC_MISSING_SIGTRAP)
400     signal(SIGTRAP, SIG_DFL);
401 #endif
402 #if !defined(PETSC_MISSING_SIGTSTP)
403     /* signal(SIGTSTP, SIG_DFL); */
404 #endif
405 #if !defined(PETSC_MISSING_SIGURG)
406     signal(SIGURG,  SIG_DFL);
407 #endif
408 #if !defined(PETSC_MISSING_SIGUSR1)
409     /* signal(SIGUSR1, SIG_DFL); */
410 #endif
411 #if !defined(PETSC_MISSING_SIGUSR2)
412     /* signal(SIGUSR2, SIG_DFL); */
413 #endif
414     SignalSet = PETSC_FALSE;
415   } else {
416     SignalSet = PETSC_TRUE;
417   }
418   PetscFunctionReturn(0);
419 }
420