1 /*
2 Routines to handle signals the program will receive.
3 Usually this will call the error handlers.
4 */
5 #include <petsc/private/petscimpl.h> /*I "petscsys.h" I*/
6 #include <signal.h>
7 #include <stdlib.h> /* for _Exit() */
8
9 static PetscClassId SIGNAL_CLASSID = 0;
10
11 struct SH {
12 PetscClassId classid;
13 PetscErrorCode (*handler)(int, void *);
14 void *ctx;
15 struct SH *previous;
16 };
17 static struct SH *sh = NULL;
18 static PetscBool SignalSet = PETSC_FALSE;
19
20 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
21 See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
22 */
MyExit(void)23 static void MyExit(void)
24 {
25 _Exit(MPI_ERR_OTHER);
26 }
27
28 /*
29 PetscSignalHandler_Private - This is the signal handler called by the system. This calls
30 any signal handler set by PETSc or the application code.
31
32 Input Parameters: (depends on system)
33 . sig - integer code indicating the type of signal
34 . code - ??
35 . sigcontext - ??
36 . addr - ??
37
38 */
39 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
PetscSignalHandler_Private(int sig,int code,struct sigcontext * scp,char * addr)40 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr)
41 #else
42 static void PetscSignalHandler_Private(int sig)
43 #endif
44 {
45 PetscErrorCode ierr;
46
47 if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, NULL);
48 else {
49 if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted");
50 ierr = (*sh->handler)(sig, sh->ctx);
51 }
52 if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR);
53 }
54
55 /*@
56 PetscSignalHandlerDefault - Default signal handler.
57
58 Not Collective
59
60 Input Parameters:
61 + sig - signal value
62 - ptr - unused pointer
63
64 Level: advanced
65
66 Developer Note:
67 This does not call `PetscError()`, it handles the entire error process, including possibly printing the traceback, directly
68
69 .seealso: [](sec_errors), `PetscPushSignalHandler()`
70 @*/
PetscSignalHandlerDefault(int sig,void * ptr)71 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr)
72 {
73 const char *SIGNAME[64];
74
75 if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
76 SIGNAME[0] = "Unknown signal";
77 #if !defined(PETSC_MISSING_SIGABRT)
78 SIGNAME[SIGABRT] = "Abort";
79 #endif
80 #if !defined(PETSC_MISSING_SIGALRM)
81 SIGNAME[SIGALRM] = "Alarm";
82 #endif
83 #if !defined(PETSC_MISSING_SIGBUS)
84 SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access";
85 #endif
86 #if !defined(PETSC_MISSING_SIGCHLD)
87 SIGNAME[SIGCHLD] = "CHLD";
88 #endif
89 #if !defined(PETSC_MISSING_SIGCONT)
90 SIGNAME[SIGCONT] = "CONT";
91 #endif
92 #if !defined(PETSC_MISSING_SIGFPE)
93 SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero";
94 #endif
95 #if !defined(PETSC_MISSING_SIGHUP)
96 SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end";
97 #endif
98 #if !defined(PETSC_MISSING_SIGILL)
99 SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption";
100 #endif
101 #if !defined(PETSC_MISSING_SIGINT)
102 SIGNAME[SIGINT] = "Interrupt";
103 #endif
104 #if !defined(PETSC_MISSING_SIGKILL)
105 SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
106 #endif
107 #if !defined(PETSC_MISSING_SIGPIPE)
108 SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
109 #endif
110 #if !defined(PETSC_MISSING_SIGQUIT)
111 SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
112 #endif
113 #if !defined(PETSC_MISSING_SIGSEGV)
114 SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
115 #endif
116 #if !defined(PETSC_MISSING_SIGSYS)
117 SIGNAME[SIGSYS] = "SYS";
118 #endif
119 #if !defined(PETSC_MISSING_SIGTERM)
120 SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
121 #endif
122 #if !defined(PETSC_MISSING_SIGTRAP)
123 SIGNAME[SIGTRAP] = "TRAP";
124 #endif
125 #if !defined(PETSC_MISSING_SIGTSTP)
126 SIGNAME[SIGTSTP] = "TSTP";
127 #endif
128 #if !defined(PETSC_MISSING_SIGURG)
129 SIGNAME[SIGURG] = "URG";
130 #endif
131 #if !defined(PETSC_MISSING_SIGUSR1)
132 SIGNAME[SIGUSR1] = "User 1";
133 #endif
134 #if !defined(PETSC_MISSING_SIGUSR2)
135 SIGNAME[SIGUSR2] = "User 2";
136 #endif
137
138 signal(sig, SIG_DFL);
139 (void)PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
140 (void)(*PetscErrorPrintf)("------------------------------------------------------------------------\n");
141 if (sig >= 0 && sig <= 20) (void)(*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]);
142 else (void)(*PetscErrorPrintf)("Caught signal\n");
143
144 (void)(*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
145 (void)(*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
146 #if defined(PETSC_HAVE_CUDA)
147 (void)(*PetscErrorPrintf)("or try https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
148 #endif
149 #if PetscDefined(USE_DEBUG)
150 #if !PetscDefined(HAVE_THREADSAFETY)
151 (void)(*PetscErrorPrintf)("--------------------- Stack Frames ------------------------------------\n");
152 (void)PetscStackView(PETSC_STDOUT);
153 #endif
154 #else
155 (void)(*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
156 (void)(*PetscErrorPrintf)("to get more information on the crash.\n");
157 #endif
158 #if !defined(PETSC_MISSING_SIGBUS)
159 if (sig == SIGSEGV || sig == SIGBUS) {
160 #else
161 if (sig == SIGSEGV) {
162 #endif
163 PetscBool debug;
164
165 (void)PetscMallocGetDebug(&debug, NULL, NULL);
166 if (debug) (void)PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__);
167 else (void)(*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
168 }
169 atexit(MyExit);
170 PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_SIG);
171 return PETSC_SUCCESS;
172 }
173
174 #if !defined(PETSC_SIGNAL_CAST)
175 #define PETSC_SIGNAL_CAST
176 #endif
177
178 /*@C
179 PetscPushSignalHandler - Catches the usual fatal errors and
180 calls a user-provided routine.
181
182 Not Collective, No Fortran Support
183
184 Input Parameters:
185 + routine - routine to call when a signal is received
186 - ctx - optional context needed by the routine
187
188 Level: developer
189
190 Note:
191 There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by
192 the loader. That information is lost with the first call to `PetscPushSignalHandler()`
193
194 .seealso: [](sec_errors), `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
195 @*/
196 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), PetscCtx ctx)
197 {
198 struct SH *newsh;
199
200 PetscFunctionBegin;
201 if (!SIGNAL_CLASSID) {
202 /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
203 SIGNAL_CLASSID = 19;
204 }
205 if (!SignalSet && routine) {
206 /* Do not catch ABRT, CHLD, KILL */
207 #if !defined(PETSC_MISSING_SIGALRM)
208 /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
209 #endif
210 #if !defined(PETSC_MISSING_SIGBUS)
211 signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
212 #endif
213 #if !defined(PETSC_MISSING_SIGCONT)
214 /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
215 #endif
216 #if !defined(PETSC_MISSING_SIGFPE)
217 signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
218 #endif
219 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
220 {
221 struct sigaction action;
222 sigaction(SIGHUP, NULL, &action);
223 if (action.sa_handler == SIG_IGN) {
224 PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefore not changing its signal handler\n"));
225 } else {
226 signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
227 }
228 }
229 #endif
230 #if !defined(PETSC_MISSING_SIGILL)
231 signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
232 #endif
233 #if !defined(PETSC_MISSING_SIGINT)
234 /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
235 #endif
236 #if !defined(PETSC_MISSING_SIGPIPE)
237 signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
238 #endif
239 #if !defined(PETSC_MISSING_SIGQUIT)
240 signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
241 #endif
242 #if !defined(PETSC_MISSING_SIGSEGV)
243 signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
244 #endif
245 #if !defined(PETSC_MISSING_SIGSYS)
246 signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
247 #endif
248 #if !defined(PETSC_MISSING_SIGTERM)
249 #if !defined(PETSC_HAVE_OPENMPI)
250 /* Open MPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
251 signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
252 #endif
253 #endif
254 #if !defined(PETSC_MISSING_SIGTRAP)
255 signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
256 #endif
257 #if !defined(PETSC_MISSING_SIGTSTP)
258 /* signal(SIGTSTP, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
259 #endif
260 #if !defined(PETSC_MISSING_SIGURG)
261 signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
262 #endif
263 #if !defined(PETSC_MISSING_SIGUSR1)
264 /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
265 #endif
266 #if !defined(PETSC_MISSING_SIGUSR2)
267 /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
268 #endif
269 SignalSet = PETSC_TRUE;
270 }
271 if (!routine) {
272 #if !defined(PETSC_MISSING_SIGALRM)
273 /* signal(SIGALRM, SIG_DFL); */
274 #endif
275 #if !defined(PETSC_MISSING_SIGBUS)
276 signal(SIGBUS, SIG_DFL);
277 #endif
278 #if !defined(PETSC_MISSING_SIGCONT)
279 /* signal(SIGCONT, SIG_DFL); */
280 #endif
281 #if !defined(PETSC_MISSING_SIGFPE)
282 signal(SIGFPE, SIG_DFL);
283 #endif
284 #if !defined(PETSC_MISSING_SIGHUP)
285 signal(SIGHUP, SIG_DFL);
286 #endif
287 #if !defined(PETSC_MISSING_SIGILL)
288 signal(SIGILL, SIG_DFL);
289 #endif
290 #if !defined(PETSC_MISSING_SIGINT)
291 /* signal(SIGINT, SIG_DFL); */
292 #endif
293 #if !defined(PETSC_MISSING_SIGPIPE)
294 signal(SIGPIPE, SIG_DFL);
295 #endif
296 #if !defined(PETSC_MISSING_SIGQUIT)
297 signal(SIGQUIT, SIG_DFL);
298 #endif
299 #if !defined(PETSC_MISSING_SIGSEGV)
300 signal(SIGSEGV, SIG_DFL);
301 #endif
302 #if !defined(PETSC_MISSING_SIGSYS)
303 signal(SIGSYS, SIG_DFL);
304 #endif
305 #if !defined(PETSC_MISSING_SIGTERM)
306 signal(SIGTERM, SIG_DFL);
307 #endif
308 #if !defined(PETSC_MISSING_SIGTRAP)
309 signal(SIGTRAP, SIG_DFL);
310 #endif
311 #if !defined(PETSC_MISSING_SIGTSTP)
312 /* signal(SIGTSTP, SIG_DFL); */
313 #endif
314 #if !defined(PETSC_MISSING_SIGURG)
315 signal(SIGURG, SIG_DFL);
316 #endif
317 #if !defined(PETSC_MISSING_SIGUSR1)
318 /* signal(SIGUSR1, SIG_DFL); */
319 #endif
320 #if !defined(PETSC_MISSING_SIGUSR2)
321 /* signal(SIGUSR2, SIG_DFL); */
322 #endif
323 SignalSet = PETSC_FALSE;
324 }
325 PetscCall(PetscNew(&newsh));
326 if (sh) {
327 PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
328 newsh->previous = sh;
329 } else newsh->previous = NULL;
330 newsh->handler = routine;
331 newsh->ctx = ctx;
332 newsh->classid = SIGNAL_CLASSID;
333 sh = newsh;
334 PetscFunctionReturn(PETSC_SUCCESS);
335 }
336
337 /*@
338 PetscPopSignalHandler - Removes the last signal handler that was pushed.
339 If no signal handlers are left on the stack it will remove the PETSc signal handler.
340 (That is PETSc will no longer catch signals).
341
342 Not Collective
343
344 Level: developer
345
346 Note:
347 There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by
348 the loader. That information is lost with the first call to `PetscPushSignalHandler()`
349
350 .seealso: [](sec_errors), `PetscPushSignalHandler()`
351 @*/
352 PetscErrorCode PetscPopSignalHandler(void)
353 {
354 struct SH *tmp;
355
356 PetscFunctionBegin;
357 if (!sh) PetscFunctionReturn(PETSC_SUCCESS);
358 PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted");
359
360 tmp = sh;
361 sh = sh->previous;
362 PetscCall(PetscFree(tmp));
363 if (!sh || !sh->handler) {
364 #if !defined(PETSC_MISSING_SIGALRM)
365 /* signal(SIGALRM, SIG_DFL); */
366 #endif
367 #if !defined(PETSC_MISSING_SIGBUS)
368 signal(SIGBUS, SIG_DFL);
369 #endif
370 #if !defined(PETSC_MISSING_SIGCONT)
371 /* signal(SIGCONT, SIG_DFL); */
372 #endif
373 #if !defined(PETSC_MISSING_SIGFPE)
374 signal(SIGFPE, SIG_DFL);
375 #endif
376 #if !defined(PETSC_MISSING_SIGHUP)
377 signal(SIGHUP, SIG_DFL);
378 #endif
379 #if !defined(PETSC_MISSING_SIGILL)
380 signal(SIGILL, SIG_DFL);
381 #endif
382 #if !defined(PETSC_MISSING_SIGINT)
383 /* signal(SIGINT, SIG_DFL); */
384 #endif
385 #if !defined(PETSC_MISSING_SIGPIPE)
386 signal(SIGPIPE, SIG_DFL);
387 #endif
388 #if !defined(PETSC_MISSING_SIGQUIT)
389 signal(SIGQUIT, SIG_DFL);
390 #endif
391 #if !defined(PETSC_MISSING_SIGSEGV)
392 signal(SIGSEGV, SIG_DFL);
393 #endif
394 #if !defined(PETSC_MISSING_SIGSYS)
395 signal(SIGSYS, SIG_DFL);
396 #endif
397 #if !defined(PETSC_MISSING_SIGTERM)
398 signal(SIGTERM, SIG_DFL);
399 #endif
400 #if !defined(PETSC_MISSING_SIGTRAP)
401 signal(SIGTRAP, SIG_DFL);
402 #endif
403 #if !defined(PETSC_MISSING_SIGTSTP)
404 /* signal(SIGTSTP, SIG_DFL); */
405 #endif
406 #if !defined(PETSC_MISSING_SIGURG)
407 signal(SIGURG, SIG_DFL);
408 #endif
409 #if !defined(PETSC_MISSING_SIGUSR1)
410 /* signal(SIGUSR1, SIG_DFL); */
411 #endif
412 #if !defined(PETSC_MISSING_SIGUSR2)
413 /* signal(SIGUSR2, SIG_DFL); */
414 #endif
415 SignalSet = PETSC_FALSE;
416 } else {
417 SignalSet = PETSC_TRUE;
418 }
419 PetscFunctionReturn(PETSC_SUCCESS);
420 }
421