xref: /petsc/src/sys/error/signal.c (revision 24ded41b4e3afbef0dd5eaa1b3d8dd0172f6dba2)
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h>             /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId   classid;
14   PetscErrorCode (*handler)(int,void*);
15   void           *ctx;
16   struct SH      *previous;
17 };
18 static struct SH *sh       = NULL;
19 static PetscBool SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void)
25 {
26   _Exit(MPI_ERR_OTHER);
27 }
28 
29 /*
30     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
31              any signal handler set by PETSc or the application code.
32 
33    Input Parameters: (depends on system)
34 .    sig - integer code indicating the type of signal
35 .    code - ??
36 .    sigcontext - ??
37 .    addr - ??
38 
39 */
40 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
41 static void PetscSignalHandler_Private(int sig,int code,struct sigcontext * scp,char *addr)
42 #else
43 static void PetscSignalHandler_Private(int sig)
44 #endif
45 {
46   PetscErrorCode ierr;
47 
48   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig,(void*)0);
49   else {
50     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_COR,"Signal object has been corrupted");
51     ierr = (*sh->handler)(sig,sh->ctx);
52   }
53   if (ierr) PETSCABORT(PETSC_COMM_WORLD,PETSC_ERR_COR);
54 }
55 
56 /*@
57    PetscSignalHandlerDefault - Default signal handler.
58 
59    Not Collective
60 
61    Input Parameters:
62 +  sig - signal value
63 -  ptr - unused pointer
64 
65    Developer Note:
66    This does not call PetscError(), handles the entire error process directly
67 
68    Level: advanced
69 
70 @*/
71 PetscErrorCode  PetscSignalHandlerDefault(int sig,void *ptr)
72 {
73   const char *SIGNAME[64];
74 
75   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
76   SIGNAME[0]       = "Unknown signal";
77 #if !defined(PETSC_MISSING_SIGABRT)
78   SIGNAME[SIGABRT] = "Abort";
79 #endif
80 #if !defined(PETSC_MISSING_SIGALRM)
81   SIGNAME[SIGALRM] = "Alarm";
82 #endif
83 #if !defined(PETSC_MISSING_SIGBUS)
84   SIGNAME[SIGBUS]  = "BUS: Bus Error, possibly illegal memory access";
85 #endif
86 #if !defined(PETSC_MISSING_SIGCHLD)
87   SIGNAME[SIGCHLD] = "CHLD";
88 #endif
89 #if !defined(PETSC_MISSING_SIGCONT)
90   SIGNAME[SIGCONT] = "CONT";
91 #endif
92 #if !defined(PETSC_MISSING_SIGFPE)
93   SIGNAME[SIGFPE]  = "FPE: Floating Point Exception,probably divide by zero";
94 #endif
95 #if !defined(PETSC_MISSING_SIGHUP)
96   SIGNAME[SIGHUP]  = "Hang up: Some other process (or the batch system) has told this process to end";
97 #endif
98 #if !defined(PETSC_MISSING_SIGILL)
99   SIGNAME[SIGILL]  = "Illegal instruction: Likely due to memory corruption";
100 #endif
101 #if !defined(PETSC_MISSING_SIGINT)
102   SIGNAME[SIGINT]  = "Interrupt";
103 #endif
104 #if !defined(PETSC_MISSING_SIGKILL)
105   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
106 #endif
107 #if !defined(PETSC_MISSING_SIGPIPE)
108   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
109 #endif
110 #if !defined(PETSC_MISSING_SIGQUIT)
111   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
112 #endif
113 #if !defined(PETSC_MISSING_SIGSEGV)
114   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
115 #endif
116 #if !defined(PETSC_MISSING_SIGSYS)
117   SIGNAME[SIGSYS]  = "SYS";
118 #endif
119 #if !defined(PETSC_MISSING_SIGTERM)
120   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
121 #endif
122 #if !defined(PETSC_MISSING_SIGTRAP)
123   SIGNAME[SIGTRAP] = "TRAP";
124 #endif
125 #if !defined(PETSC_MISSING_SIGTSTP)
126   SIGNAME[SIGTSTP] = "TSTP";
127 #endif
128 #if !defined(PETSC_MISSING_SIGURG)
129   SIGNAME[SIGURG]  = "URG";
130 #endif
131 #if !defined(PETSC_MISSING_SIGUSR1)
132   SIGNAME[SIGUSR1] = "User 1";
133 #endif
134 #if !defined(PETSC_MISSING_SIGUSR2)
135   SIGNAME[SIGUSR2] = "User 2";
136 #endif
137 
138   signal(sig,SIG_DFL);
139   PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
140   (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
141   if (sig >= 0 && sig <= 20) (*PetscErrorPrintf)("Caught signal number %d %s\n",sig,SIGNAME[sig]);
142   else (*PetscErrorPrintf)("Caught signal\n");
143 
144   (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
145   (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
146 #if defined(PETSC_HAVE_CUDA)
147   (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
148 #endif
149 #if PetscDefined(USE_DEBUG)
150   (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
151   PetscStackView(PETSC_STDOUT);
152 #else
153   (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
154   (*PetscErrorPrintf)("to get more information on the crash.\n");
155 #endif
156 #if !defined(PETSC_MISSING_SIGBUS)
157   if (sig == SIGSEGV || sig == SIGBUS) {
158 #else
159   if (sig == SIGSEGV) {
160 #endif
161     PetscBool debug;
162 
163     PetscMallocGetDebug(&debug,NULL,NULL);
164     if (debug) PetscMallocValidate(__LINE__,PETSC_FUNCTION_NAME,__FILE__);
165     else (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
166   }
167   atexit(MyExit);
168   PETSCABORT(PETSC_COMM_WORLD,(int)PETSC_ERR_SIG);
169   return 0;
170 }
171 
172 #if !defined(PETSC_SIGNAL_CAST)
173 #define PETSC_SIGNAL_CAST
174 #endif
175 
176 /*@C
177    PetscPushSignalHandler - Catches the usual fatal errors and
178    calls a user-provided routine.
179 
180    Not Collective
181 
182    Input Parameters:
183 +  routine - routine to call when a signal is received
184 -  ctx - optional context needed by the routine
185 
186   Level: developer
187 
188 .seealso: `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
189 
190 @*/
191 PetscErrorCode  PetscPushSignalHandler(PetscErrorCode (*routine)(int,void*),void *ctx)
192 {
193   struct  SH     *newsh;
194 
195   PetscFunctionBegin;
196   if (!SIGNAL_CLASSID) {
197     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
198     SIGNAL_CLASSID = 19;
199   }
200   if (!SignalSet && routine) {
201     /* Do not catch ABRT, CHLD, KILL */
202 #if !defined(PETSC_MISSING_SIGALRM)
203     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
204 #endif
205 #if !defined(PETSC_MISSING_SIGBUS)
206     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
207 #endif
208 #if !defined(PETSC_MISSING_SIGCONT)
209     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
210 #endif
211 #if !defined(PETSC_MISSING_SIGFPE)
212     signal(SIGFPE,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
213 #endif
214 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
215     {
216       struct  sigaction action;
217       sigaction(SIGHUP,NULL,&action);
218       if (action.sa_handler == SIG_IGN) {
219         PetscCall(PetscInfo(NULL,"SIGHUP previously set to ignore, therefor not changing its signal handler\n"));
220       } else {
221         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
222       }
223     }
224 #endif
225 #if !defined(PETSC_MISSING_SIGILL)
226     signal(SIGILL,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
227 #endif
228 #if !defined(PETSC_MISSING_SIGINT)
229     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
230 #endif
231 #if !defined(PETSC_MISSING_SIGPIPE)
232     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
233 #endif
234 #if !defined(PETSC_MISSING_SIGQUIT)
235     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
236 #endif
237 #if !defined(PETSC_MISSING_SIGSEGV)
238     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
239 #endif
240 #if !defined(PETSC_MISSING_SIGSYS)
241     signal(SIGSYS,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
242 #endif
243 #if !defined(PETSC_MISSING_SIGTERM)
244 #if !defined(OMPI_MAJOR_VERSION)
245     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
246     signal(SIGTERM,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
247 #endif
248 #endif
249 #if !defined(PETSC_MISSING_SIGTRAP)
250     signal(SIGTRAP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
251 #endif
252 #if !defined(PETSC_MISSING_SIGTSTP)
253     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
254 #endif
255 #if !defined(PETSC_MISSING_SIGURG)
256     signal(SIGURG,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
257 #endif
258 #if !defined(PETSC_MISSING_SIGUSR1)
259     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
260 #endif
261 #if !defined(PETSC_MISSING_SIGUSR2)
262     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
263 #endif
264     SignalSet = PETSC_TRUE;
265   }
266   if (!routine) {
267 #if !defined(PETSC_MISSING_SIGALRM)
268     /* signal(SIGALRM, SIG_DFL); */
269 #endif
270 #if !defined(PETSC_MISSING_SIGBUS)
271     signal(SIGBUS,  SIG_DFL);
272 #endif
273 #if !defined(PETSC_MISSING_SIGCONT)
274     /* signal(SIGCONT, SIG_DFL); */
275 #endif
276 #if !defined(PETSC_MISSING_SIGFPE)
277     signal(SIGFPE,  SIG_DFL);
278 #endif
279 #if !defined(PETSC_MISSING_SIGHUP)
280     signal(SIGHUP,  SIG_DFL);
281 #endif
282 #if !defined(PETSC_MISSING_SIGILL)
283     signal(SIGILL,  SIG_DFL);
284 #endif
285 #if !defined(PETSC_MISSING_SIGINT)
286     /* signal(SIGINT,  SIG_DFL); */
287 #endif
288 #if !defined(PETSC_MISSING_SIGPIPE)
289     signal(SIGPIPE, SIG_DFL);
290 #endif
291 #if !defined(PETSC_MISSING_SIGQUIT)
292     signal(SIGQUIT, SIG_DFL);
293 #endif
294 #if !defined(PETSC_MISSING_SIGSEGV)
295     signal(SIGSEGV, SIG_DFL);
296 #endif
297 #if !defined(PETSC_MISSING_SIGSYS)
298     signal(SIGSYS,  SIG_DFL);
299 #endif
300 #if !defined(PETSC_MISSING_SIGTERM)
301     signal(SIGTERM, SIG_DFL);
302 #endif
303 #if !defined(PETSC_MISSING_SIGTRAP)
304     signal(SIGTRAP, SIG_DFL);
305 #endif
306 #if !defined(PETSC_MISSING_SIGTSTP)
307     /* signal(SIGTSTP, SIG_DFL); */
308 #endif
309 #if !defined(PETSC_MISSING_SIGURG)
310     signal(SIGURG,  SIG_DFL);
311 #endif
312 #if !defined(PETSC_MISSING_SIGUSR1)
313     /* signal(SIGUSR1, SIG_DFL); */
314 #endif
315 #if !defined(PETSC_MISSING_SIGUSR2)
316     /* signal(SIGUSR2, SIG_DFL); */
317 #endif
318     SignalSet = PETSC_FALSE;
319   }
320   PetscCall(PetscNew(&newsh));
321   if (sh) {
322     PetscCheck(sh->classid == SIGNAL_CLASSID,PETSC_COMM_SELF,PETSC_ERR_COR,"Signal object has been corrupted");
323     newsh->previous = sh;
324   }  else newsh->previous = NULL;
325   newsh->handler = routine;
326   newsh->ctx     = ctx;
327   newsh->classid = SIGNAL_CLASSID;
328   sh             = newsh;
329   PetscFunctionReturn(0);
330 }
331 
332 /*@
333    PetscPopSignalHandler - Removes the most last signal handler that was pushed.
334        If no signal handlers are left on the stack it will remove the PETSc signal handler.
335        (That is PETSc will no longer catch signals).
336 
337    Not Collective
338 
339   Level: developer
340 
341 .seealso: `PetscPushSignalHandler()`
342 
343 @*/
344 PetscErrorCode  PetscPopSignalHandler(void)
345 {
346   struct SH      *tmp;
347 
348   PetscFunctionBegin;
349   if (!sh) PetscFunctionReturn(0);
350   PetscCheck(sh->classid == SIGNAL_CLASSID,PETSC_COMM_SELF,PETSC_ERR_COR,"Signal object has been corrupted");
351 
352   tmp = sh;
353   sh  = sh->previous;
354   PetscCall(PetscFree(tmp));
355   if (!sh || !sh->handler) {
356 #if !defined(PETSC_MISSING_SIGALRM)
357     /* signal(SIGALRM, SIG_DFL); */
358 #endif
359 #if !defined(PETSC_MISSING_SIGBUS)
360     signal(SIGBUS,  SIG_DFL);
361 #endif
362 #if !defined(PETSC_MISSING_SIGCONT)
363     /* signal(SIGCONT, SIG_DFL); */
364 #endif
365 #if !defined(PETSC_MISSING_SIGFPE)
366     signal(SIGFPE,  SIG_DFL);
367 #endif
368 #if !defined(PETSC_MISSING_SIGHUP)
369     signal(SIGHUP,  SIG_DFL);
370 #endif
371 #if !defined(PETSC_MISSING_SIGILL)
372     signal(SIGILL,  SIG_DFL);
373 #endif
374 #if !defined(PETSC_MISSING_SIGINT)
375     /* signal(SIGINT,  SIG_DFL); */
376 #endif
377 #if !defined(PETSC_MISSING_SIGPIPE)
378     signal(SIGPIPE, SIG_DFL);
379 #endif
380 #if !defined(PETSC_MISSING_SIGQUIT)
381     signal(SIGQUIT, SIG_DFL);
382 #endif
383 #if !defined(PETSC_MISSING_SIGSEGV)
384     signal(SIGSEGV, SIG_DFL);
385 #endif
386 #if !defined(PETSC_MISSING_SIGSYS)
387     signal(SIGSYS,  SIG_DFL);
388 #endif
389 #if !defined(PETSC_MISSING_SIGTERM)
390     signal(SIGTERM, SIG_DFL);
391 #endif
392 #if !defined(PETSC_MISSING_SIGTRAP)
393     signal(SIGTRAP, SIG_DFL);
394 #endif
395 #if !defined(PETSC_MISSING_SIGTSTP)
396     /* signal(SIGTSTP, SIG_DFL); */
397 #endif
398 #if !defined(PETSC_MISSING_SIGURG)
399     signal(SIGURG,  SIG_DFL);
400 #endif
401 #if !defined(PETSC_MISSING_SIGUSR1)
402     /* signal(SIGUSR1, SIG_DFL); */
403 #endif
404 #if !defined(PETSC_MISSING_SIGUSR2)
405     /* signal(SIGUSR2, SIG_DFL); */
406 #endif
407     SignalSet = PETSC_FALSE;
408   } else {
409     SignalSet = PETSC_TRUE;
410   }
411   PetscFunctionReturn(0);
412 }
413