xref: /petsc/src/sys/error/signal.c (revision 2fa40bb9206b96114faa7cb222621ec184d31cd2)
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h>             /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId   classid;
14   PetscErrorCode (*handler)(int,void*);
15   void           *ctx;
16   struct SH      *previous;
17 };
18 static struct SH *sh       = NULL;
19 static PetscBool SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void)
25 {
26   _Exit(MPI_ERR_OTHER);
27 }
28 
29 /*
30     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
31              any signal handler set by PETSc or the application code.
32 
33    Input Parameters: (depends on system)
34 .    sig - integer code indicating the type of signal
35 .    code - ??
36 .    sigcontext - ??
37 .    addr - ??
38 
39 */
40 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
41 static void PetscSignalHandler_Private(int sig,int code,struct sigcontext * scp,char *addr)
42 #else
43 static void PetscSignalHandler_Private(int sig)
44 #endif
45 {
46   PetscErrorCode ierr;
47 
48   PetscFunctionBegin;
49   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig,(void*)0);
50   else {
51     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_COR,"Signal object has been corrupted");
52     ierr = (*sh->handler)(sig,sh->ctx);
53   }
54   if (ierr) PETSCABORT(PETSC_COMM_WORLD,PETSC_ERR_COR);
55 }
56 
57 /*@
58    PetscSignalHandlerDefault - Default signal handler.
59 
60    Not Collective
61 
62    Level: advanced
63 
64    Input Parameters:
65 +  sig - signal value
66 -  ptr - unused pointer
67 
68 @*/
69 PetscErrorCode  PetscSignalHandlerDefault(int sig,void *ptr)
70 {
71   PetscErrorCode ierr;
72   const char     *SIGNAME[64];
73 
74   PetscFunctionBegin;
75   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
76   SIGNAME[0]       = "Unknown signal";
77 #if !defined(PETSC_MISSING_SIGABRT)
78   SIGNAME[SIGABRT] = "Abort";
79 #endif
80 #if !defined(PETSC_MISSING_SIGALRM)
81   SIGNAME[SIGALRM] = "Alarm";
82 #endif
83 #if !defined(PETSC_MISSING_SIGBUS)
84   SIGNAME[SIGBUS]  = "BUS: Bus Error, possibly illegal memory access";
85 #endif
86 #if !defined(PETSC_MISSING_SIGCHLD)
87   SIGNAME[SIGCHLD] = "CHLD";
88 #endif
89 #if !defined(PETSC_MISSING_SIGCONT)
90   SIGNAME[SIGCONT] = "CONT";
91 #endif
92 #if !defined(PETSC_MISSING_SIGFPE)
93   SIGNAME[SIGFPE]  = "FPE: Floating Point Exception,probably divide by zero";
94 #endif
95 #if !defined(PETSC_MISSING_SIGHUP)
96   SIGNAME[SIGHUP]  = "Hang up: Some other process (or the batch system) has told this process to end";
97 #endif
98 #if !defined(PETSC_MISSING_SIGILL)
99   SIGNAME[SIGILL]  = "Illegal instruction: Likely due to memory corruption";
100 #endif
101 #if !defined(PETSC_MISSING_SIGINT)
102   SIGNAME[SIGINT]  = "Interrupt";
103 #endif
104 #if !defined(PETSC_MISSING_SIGKILL)
105   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
106 #endif
107 #if !defined(PETSC_MISSING_SIGPIPE)
108   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
109 #endif
110 #if !defined(PETSC_MISSING_SIGQUIT)
111   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
112 #endif
113 #if !defined(PETSC_MISSING_SIGSEGV)
114   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
115 #endif
116 #if !defined(PETSC_MISSING_SIGSYS)
117   SIGNAME[SIGSYS]  = "SYS";
118 #endif
119 #if !defined(PETSC_MISSING_SIGTERM)
120   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
121 #endif
122 #if !defined(PETSC_MISSING_SIGTRAP)
123   SIGNAME[SIGTRAP] = "TRAP";
124 #endif
125 #if !defined(PETSC_MISSING_SIGTSTP)
126   SIGNAME[SIGTSTP] = "TSTP";
127 #endif
128 #if !defined(PETSC_MISSING_SIGURG)
129   SIGNAME[SIGURG]  = "URG";
130 #endif
131 #if !defined(PETSC_MISSING_SIGUSR1)
132   SIGNAME[SIGUSR1] = "User 1";
133 #endif
134 #if !defined(PETSC_MISSING_SIGUSR2)
135   SIGNAME[SIGUSR2] = "User 2";
136 #endif
137 
138   signal(sig,SIG_DFL);
139   (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
140   if (sig >= 0 && sig <= 20) (*PetscErrorPrintf)("Caught signal number %d %s\n",sig,SIGNAME[sig]);
141   else (*PetscErrorPrintf)("Caught signal\n");
142 
143   (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
144   (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind\n");
145   (*PetscErrorPrintf)("or try http://valgrind.org on GNU/linux and Apple Mac OS X to find memory corruption errors\n");
146 #if defined(PETSC_HAVE_CUDA)
147   (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems  to find memory corruption errors\n");
148 #endif
149 #if PetscDefined(USE_DEBUG)
150   PetscStackPop;  /* remove stack frames for error handlers */
151   PetscStackPop;
152   (*PetscErrorPrintf)("likely location of problem given in stack below\n");
153   (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
154   PetscStackView(PETSC_STDOUT);
155 #else
156   (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
157   (*PetscErrorPrintf)("to get more information on the crash.\n");
158 #endif
159   ierr =  PetscError(PETSC_COMM_SELF,0,"User provided function","unknown file",PETSC_ERR_SIG,PETSC_ERROR_INITIAL,NULL);
160 #if !defined(PETSC_MISSING_SIGBUS)
161   if (sig == SIGSEGV || sig == SIGBUS) {
162 #else
163   if (sig == SIGSEGV) {
164 #endif
165     PetscBool debug;
166 
167     PetscMallocGetDebug(&debug,NULL,NULL);
168     if (debug) {
169       (*PetscErrorPrintf)("Checking the memory for corruption.\n");
170       PetscMallocValidate(__LINE__,PETSC_FUNCTION_NAME,__FILE__);
171     } else {
172       (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
173     }
174   }
175   atexit(MyExit);
176   PETSCABORT(PETSC_COMM_WORLD,(int)ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #if !defined(PETSC_SIGNAL_CAST)
181 #define PETSC_SIGNAL_CAST
182 #endif
183 
184 /*@C
185    PetscPushSignalHandler - Catches the usual fatal errors and
186    calls a user-provided routine.
187 
188    Not Collective
189 
190    Input Parameters:
191 +  routine - routine to call when a signal is received
192 -  ctx - optional context needed by the routine
193 
194   Level: developer
195 
196 .seealso: PetscPopSignalHandler(), PetscSignalHandlerDefault(), PetscPushErrorHandler()
197 
198 @*/
199 PetscErrorCode  PetscPushSignalHandler(PetscErrorCode (*routine)(int,void*),void *ctx)
200 {
201   struct  SH     *newsh;
202   PetscErrorCode ierr;
203 
204   PetscFunctionBegin;
205   if (!SIGNAL_CLASSID) {
206     /* ierr = PetscClassIdRegister("Signal",&SIGNAL_CLASSID);CHKERRQ(ierr); */
207     SIGNAL_CLASSID = 19;
208   }
209   if (!SignalSet && routine) {
210     /* Do not catch ABRT, CHLD, KILL */
211 #if !defined(PETSC_MISSING_SIGALRM)
212     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
213 #endif
214 #if !defined(PETSC_MISSING_SIGBUS)
215     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
216 #endif
217 #if !defined(PETSC_MISSING_SIGCONT)
218     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
219 #endif
220 #if !defined(PETSC_MISSING_SIGFPE)
221     signal(SIGFPE,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
222 #endif
223 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
224     {
225       struct  sigaction action;
226       sigaction(SIGHUP,NULL,&action);
227       if (action.sa_handler == SIG_IGN) {
228         ierr = PetscInfo(NULL,"SIGHUP previously set to ignore, therefor not changing its signal handler\n");CHKERRQ(ierr);
229       } else {
230         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
231       }
232     }
233 #endif
234 #if !defined(PETSC_MISSING_SIGILL)
235     signal(SIGILL,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
236 #endif
237 #if !defined(PETSC_MISSING_SIGINT)
238     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
239 #endif
240 #if !defined(PETSC_MISSING_SIGPIPE)
241     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
242 #endif
243 #if !defined(PETSC_MISSING_SIGQUIT)
244     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
245 #endif
246 #if !defined(PETSC_MISSING_SIGSEGV)
247     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
248 #endif
249 #if !defined(PETSC_MISSING_SIGSYS)
250     signal(SIGSYS,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
251 #endif
252 #if !defined(PETSC_MISSING_SIGTERM)
253 #if !defined(OMPI_MAJOR_VERSION)
254     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
255     signal(SIGTERM,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
256 #endif
257 #endif
258 #if !defined(PETSC_MISSING_SIGTRAP)
259     signal(SIGTRAP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
260 #endif
261 #if !defined(PETSC_MISSING_SIGTSTP)
262     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
263 #endif
264 #if !defined(PETSC_MISSING_SIGURG)
265     signal(SIGURG,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
266 #endif
267 #if !defined(PETSC_MISSING_SIGUSR1)
268     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
269 #endif
270 #if !defined(PETSC_MISSING_SIGUSR2)
271     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
272 #endif
273     SignalSet = PETSC_TRUE;
274   }
275   if (!routine) {
276 #if !defined(PETSC_MISSING_SIGALRM)
277     /* signal(SIGALRM, SIG_DFL); */
278 #endif
279 #if !defined(PETSC_MISSING_SIGBUS)
280     signal(SIGBUS,  SIG_DFL);
281 #endif
282 #if !defined(PETSC_MISSING_SIGCONT)
283     /* signal(SIGCONT, SIG_DFL); */
284 #endif
285 #if !defined(PETSC_MISSING_SIGFPE)
286     signal(SIGFPE,  SIG_DFL);
287 #endif
288 #if !defined(PETSC_MISSING_SIGHUP)
289     signal(SIGHUP,  SIG_DFL);
290 #endif
291 #if !defined(PETSC_MISSING_SIGILL)
292     signal(SIGILL,  SIG_DFL);
293 #endif
294 #if !defined(PETSC_MISSING_SIGINT)
295     /* signal(SIGINT,  SIG_DFL); */
296 #endif
297 #if !defined(PETSC_MISSING_SIGPIPE)
298     signal(SIGPIPE, SIG_DFL);
299 #endif
300 #if !defined(PETSC_MISSING_SIGQUIT)
301     signal(SIGQUIT, SIG_DFL);
302 #endif
303 #if !defined(PETSC_MISSING_SIGSEGV)
304     signal(SIGSEGV, SIG_DFL);
305 #endif
306 #if !defined(PETSC_MISSING_SIGSYS)
307     signal(SIGSYS,  SIG_DFL);
308 #endif
309 #if !defined(PETSC_MISSING_SIGTERM)
310     signal(SIGTERM, SIG_DFL);
311 #endif
312 #if !defined(PETSC_MISSING_SIGTRAP)
313     signal(SIGTRAP, SIG_DFL);
314 #endif
315 #if !defined(PETSC_MISSING_SIGTSTP)
316     /* signal(SIGTSTP, SIG_DFL); */
317 #endif
318 #if !defined(PETSC_MISSING_SIGURG)
319     signal(SIGURG,  SIG_DFL);
320 #endif
321 #if !defined(PETSC_MISSING_SIGUSR1)
322     /* signal(SIGUSR1, SIG_DFL); */
323 #endif
324 #if !defined(PETSC_MISSING_SIGUSR2)
325     /* signal(SIGUSR2, SIG_DFL); */
326 #endif
327     SignalSet = PETSC_FALSE;
328   }
329   ierr = PetscNew(&newsh);CHKERRQ(ierr);
330   if (sh) {
331     if (sh->classid != SIGNAL_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"Signal object has been corrupted");
332     newsh->previous = sh;
333   }  else newsh->previous = NULL;
334   newsh->handler = routine;
335   newsh->ctx     = ctx;
336   newsh->classid = SIGNAL_CLASSID;
337   sh             = newsh;
338   PetscFunctionReturn(0);
339 }
340 
341 /*@
342    PetscPopSignalHandler - Removes the most last signal handler that was pushed.
343        If no signal handlers are left on the stack it will remove the PETSc signal handler.
344        (That is PETSc will no longer catch signals).
345 
346    Not Collective
347 
348   Level: developer
349 
350 .seealso: PetscPushSignalHandler()
351 
352 @*/
353 PetscErrorCode  PetscPopSignalHandler(void)
354 {
355   struct SH      *tmp;
356   PetscErrorCode ierr;
357 
358   PetscFunctionBegin;
359   if (!sh) PetscFunctionReturn(0);
360   if (sh->classid != SIGNAL_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"Signal object has been corrupted");
361 
362   tmp = sh;
363   sh  = sh->previous;
364   ierr = PetscFree(tmp);CHKERRQ(ierr);
365   if (!sh || !sh->handler) {
366 #if !defined(PETSC_MISSING_SIGALRM)
367     /* signal(SIGALRM, SIG_DFL); */
368 #endif
369 #if !defined(PETSC_MISSING_SIGBUS)
370     signal(SIGBUS,  SIG_DFL);
371 #endif
372 #if !defined(PETSC_MISSING_SIGCONT)
373     /* signal(SIGCONT, SIG_DFL); */
374 #endif
375 #if !defined(PETSC_MISSING_SIGFPE)
376     signal(SIGFPE,  SIG_DFL);
377 #endif
378 #if !defined(PETSC_MISSING_SIGHUP)
379     signal(SIGHUP,  SIG_DFL);
380 #endif
381 #if !defined(PETSC_MISSING_SIGILL)
382     signal(SIGILL,  SIG_DFL);
383 #endif
384 #if !defined(PETSC_MISSING_SIGINT)
385     /* signal(SIGINT,  SIG_DFL); */
386 #endif
387 #if !defined(PETSC_MISSING_SIGPIPE)
388     signal(SIGPIPE, SIG_DFL);
389 #endif
390 #if !defined(PETSC_MISSING_SIGQUIT)
391     signal(SIGQUIT, SIG_DFL);
392 #endif
393 #if !defined(PETSC_MISSING_SIGSEGV)
394     signal(SIGSEGV, SIG_DFL);
395 #endif
396 #if !defined(PETSC_MISSING_SIGSYS)
397     signal(SIGSYS,  SIG_DFL);
398 #endif
399 #if !defined(PETSC_MISSING_SIGTERM)
400     signal(SIGTERM, SIG_DFL);
401 #endif
402 #if !defined(PETSC_MISSING_SIGTRAP)
403     signal(SIGTRAP, SIG_DFL);
404 #endif
405 #if !defined(PETSC_MISSING_SIGTSTP)
406     /* signal(SIGTSTP, SIG_DFL); */
407 #endif
408 #if !defined(PETSC_MISSING_SIGURG)
409     signal(SIGURG,  SIG_DFL);
410 #endif
411 #if !defined(PETSC_MISSING_SIGUSR1)
412     /* signal(SIGUSR1, SIG_DFL); */
413 #endif
414 #if !defined(PETSC_MISSING_SIGUSR2)
415     /* signal(SIGUSR2, SIG_DFL); */
416 #endif
417     SignalSet = PETSC_FALSE;
418   } else {
419     SignalSet = PETSC_TRUE;
420   }
421   PetscFunctionReturn(0);
422 }
423