1 /* 2 Routines to handle signals the program will receive. 3 Usually this will call the error handlers. 4 */ 5 #include <petsc/private/petscimpl.h> /*I "petscsys.h" I*/ 6 #include <signal.h> 7 #include <stdlib.h> /* for _Exit() */ 8 9 static PetscClassId SIGNAL_CLASSID = 0; 10 11 struct SH { 12 PetscClassId classid; 13 PetscErrorCode (*handler)(int, void *); 14 void *ctx; 15 struct SH *previous; 16 }; 17 static struct SH *sh = NULL; 18 static PetscBool SignalSet = PETSC_FALSE; 19 20 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions. 21 See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745. 22 */ 23 static void MyExit(void) 24 { 25 _Exit(MPI_ERR_OTHER); 26 } 27 28 /* 29 PetscSignalHandler_Private - This is the signal handler called by the system. This calls 30 any signal handler set by PETSc or the application code. 31 32 Input Parameters: (depends on system) 33 . sig - integer code indicating the type of signal 34 . code - ?? 35 . sigcontext - ?? 36 . addr - ?? 37 38 */ 39 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER) 40 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr) 41 #else 42 static void PetscSignalHandler_Private(int sig) 43 #endif 44 { 45 PetscErrorCode ierr; 46 47 if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, (void *)0); 48 else { 49 if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted"); 50 ierr = (*sh->handler)(sig, sh->ctx); 51 } 52 if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR); 53 } 54 55 /*@ 56 PetscSignalHandlerDefault - Default signal handler. 57 58 Not Collective 59 60 Input Parameters: 61 + sig - signal value 62 - ptr - unused pointer 63 64 Level: advanced 65 66 Developer Note: 67 This does not call `PetscError()`, it handles the entire error process, including possibly printing the traceback, directly 68 69 .seealso: [](sec_errors), `PetscPushSignalHandler()` 70 @*/ 71 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr) 72 { 73 PetscErrorCode ierr; 74 const char *SIGNAME[64]; 75 76 if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi(); 77 SIGNAME[0] = "Unknown signal"; 78 #if !defined(PETSC_MISSING_SIGABRT) 79 SIGNAME[SIGABRT] = "Abort"; 80 #endif 81 #if !defined(PETSC_MISSING_SIGALRM) 82 SIGNAME[SIGALRM] = "Alarm"; 83 #endif 84 #if !defined(PETSC_MISSING_SIGBUS) 85 SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access"; 86 #endif 87 #if !defined(PETSC_MISSING_SIGCHLD) 88 SIGNAME[SIGCHLD] = "CHLD"; 89 #endif 90 #if !defined(PETSC_MISSING_SIGCONT) 91 SIGNAME[SIGCONT] = "CONT"; 92 #endif 93 #if !defined(PETSC_MISSING_SIGFPE) 94 SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero"; 95 #endif 96 #if !defined(PETSC_MISSING_SIGHUP) 97 SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end"; 98 #endif 99 #if !defined(PETSC_MISSING_SIGILL) 100 SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption"; 101 #endif 102 #if !defined(PETSC_MISSING_SIGINT) 103 SIGNAME[SIGINT] = "Interrupt"; 104 #endif 105 #if !defined(PETSC_MISSING_SIGKILL) 106 SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end"; 107 #endif 108 #if !defined(PETSC_MISSING_SIGPIPE) 109 SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket"; 110 #endif 111 #if !defined(PETSC_MISSING_SIGQUIT) 112 SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end"; 113 #endif 114 #if !defined(PETSC_MISSING_SIGSEGV) 115 SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range"; 116 #endif 117 #if !defined(PETSC_MISSING_SIGSYS) 118 SIGNAME[SIGSYS] = "SYS"; 119 #endif 120 #if !defined(PETSC_MISSING_SIGTERM) 121 SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end"; 122 #endif 123 #if !defined(PETSC_MISSING_SIGTRAP) 124 SIGNAME[SIGTRAP] = "TRAP"; 125 #endif 126 #if !defined(PETSC_MISSING_SIGTSTP) 127 SIGNAME[SIGTSTP] = "TSTP"; 128 #endif 129 #if !defined(PETSC_MISSING_SIGURG) 130 SIGNAME[SIGURG] = "URG"; 131 #endif 132 #if !defined(PETSC_MISSING_SIGUSR1) 133 SIGNAME[SIGUSR1] = "User 1"; 134 #endif 135 #if !defined(PETSC_MISSING_SIGUSR2) 136 SIGNAME[SIGUSR2] = "User 2"; 137 #endif 138 139 signal(sig, SIG_DFL); 140 ierr = PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */ 141 ierr = (*PetscErrorPrintf)("------------------------------------------------------------------------\n"); 142 if (sig >= 0 && sig <= 20) ierr = (*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]); 143 else ierr = (*PetscErrorPrintf)("Caught signal\n"); 144 145 ierr = (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n"); 146 ierr = (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n"); 147 #if defined(PETSC_HAVE_CUDA) 148 ierr = (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n"); 149 #endif 150 #if PetscDefined(USE_DEBUG) 151 #if !PetscDefined(HAVE_THREADSAFETY) 152 ierr = (*PetscErrorPrintf)("--------------------- Stack Frames ------------------------------------\n"); 153 ierr = PetscStackView(PETSC_STDOUT); 154 #endif 155 #else 156 ierr = (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n"); 157 ierr = (*PetscErrorPrintf)("to get more information on the crash.\n"); 158 #endif 159 #if !defined(PETSC_MISSING_SIGBUS) 160 if (sig == SIGSEGV || sig == SIGBUS) { 161 #else 162 if (sig == SIGSEGV) { 163 #endif 164 PetscBool debug; 165 166 ierr = PetscMallocGetDebug(&debug, NULL, NULL); 167 if (debug) ierr = PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__); 168 else ierr = (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n"); 169 } 170 atexit(MyExit); 171 (void)ierr; 172 PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_SIG); 173 return PETSC_SUCCESS; 174 } 175 176 #if !defined(PETSC_SIGNAL_CAST) 177 #define PETSC_SIGNAL_CAST 178 #endif 179 180 /*@C 181 PetscPushSignalHandler - Catches the usual fatal errors and 182 calls a user-provided routine. 183 184 Not Collective 185 186 Input Parameters: 187 + routine - routine to call when a signal is received 188 - ctx - optional context needed by the routine 189 190 Level: developer 191 192 Note: 193 There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by 194 the loader. That information is lost with the first call to `PetscPushSignalHandler()` 195 196 .seealso: [](sec_errors), `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()` 197 @*/ 198 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), void *ctx) 199 { 200 struct SH *newsh; 201 202 PetscFunctionBegin; 203 if (!SIGNAL_CLASSID) { 204 /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */ 205 SIGNAL_CLASSID = 19; 206 } 207 if (!SignalSet && routine) { 208 /* Do not catch ABRT, CHLD, KILL */ 209 #if !defined(PETSC_MISSING_SIGALRM) 210 /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */ 211 #endif 212 #if !defined(PETSC_MISSING_SIGBUS) 213 signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 214 #endif 215 #if !defined(PETSC_MISSING_SIGCONT) 216 /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/ 217 #endif 218 #if !defined(PETSC_MISSING_SIGFPE) 219 signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 220 #endif 221 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION) 222 { 223 struct sigaction action; 224 sigaction(SIGHUP, NULL, &action); 225 if (action.sa_handler == SIG_IGN) { 226 PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefore not changing its signal handler\n")); 227 } else { 228 signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 229 } 230 } 231 #endif 232 #if !defined(PETSC_MISSING_SIGILL) 233 signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 234 #endif 235 #if !defined(PETSC_MISSING_SIGINT) 236 /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */ 237 #endif 238 #if !defined(PETSC_MISSING_SIGPIPE) 239 signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 240 #endif 241 #if !defined(PETSC_MISSING_SIGQUIT) 242 signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 243 #endif 244 #if !defined(PETSC_MISSING_SIGSEGV) 245 signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 246 #endif 247 #if !defined(PETSC_MISSING_SIGSYS) 248 signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 249 #endif 250 #if !defined(PETSC_MISSING_SIGTERM) 251 #if !defined(OMPI_MAJOR_VERSION) 252 /* Open MPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */ 253 signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 254 #endif 255 #endif 256 #if !defined(PETSC_MISSING_SIGTRAP) 257 signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 258 #endif 259 #if !defined(PETSC_MISSING_SIGTSTP) 260 /* signal(SIGTSTP, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */ 261 #endif 262 #if !defined(PETSC_MISSING_SIGURG) 263 signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 264 #endif 265 #if !defined(PETSC_MISSING_SIGUSR1) 266 /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */ 267 #endif 268 #if !defined(PETSC_MISSING_SIGUSR2) 269 /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */ 270 #endif 271 SignalSet = PETSC_TRUE; 272 } 273 if (!routine) { 274 #if !defined(PETSC_MISSING_SIGALRM) 275 /* signal(SIGALRM, SIG_DFL); */ 276 #endif 277 #if !defined(PETSC_MISSING_SIGBUS) 278 signal(SIGBUS, SIG_DFL); 279 #endif 280 #if !defined(PETSC_MISSING_SIGCONT) 281 /* signal(SIGCONT, SIG_DFL); */ 282 #endif 283 #if !defined(PETSC_MISSING_SIGFPE) 284 signal(SIGFPE, SIG_DFL); 285 #endif 286 #if !defined(PETSC_MISSING_SIGHUP) 287 signal(SIGHUP, SIG_DFL); 288 #endif 289 #if !defined(PETSC_MISSING_SIGILL) 290 signal(SIGILL, SIG_DFL); 291 #endif 292 #if !defined(PETSC_MISSING_SIGINT) 293 /* signal(SIGINT, SIG_DFL); */ 294 #endif 295 #if !defined(PETSC_MISSING_SIGPIPE) 296 signal(SIGPIPE, SIG_DFL); 297 #endif 298 #if !defined(PETSC_MISSING_SIGQUIT) 299 signal(SIGQUIT, SIG_DFL); 300 #endif 301 #if !defined(PETSC_MISSING_SIGSEGV) 302 signal(SIGSEGV, SIG_DFL); 303 #endif 304 #if !defined(PETSC_MISSING_SIGSYS) 305 signal(SIGSYS, SIG_DFL); 306 #endif 307 #if !defined(PETSC_MISSING_SIGTERM) 308 signal(SIGTERM, SIG_DFL); 309 #endif 310 #if !defined(PETSC_MISSING_SIGTRAP) 311 signal(SIGTRAP, SIG_DFL); 312 #endif 313 #if !defined(PETSC_MISSING_SIGTSTP) 314 /* signal(SIGTSTP, SIG_DFL); */ 315 #endif 316 #if !defined(PETSC_MISSING_SIGURG) 317 signal(SIGURG, SIG_DFL); 318 #endif 319 #if !defined(PETSC_MISSING_SIGUSR1) 320 /* signal(SIGUSR1, SIG_DFL); */ 321 #endif 322 #if !defined(PETSC_MISSING_SIGUSR2) 323 /* signal(SIGUSR2, SIG_DFL); */ 324 #endif 325 SignalSet = PETSC_FALSE; 326 } 327 PetscCall(PetscNew(&newsh)); 328 if (sh) { 329 PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted"); 330 newsh->previous = sh; 331 } else newsh->previous = NULL; 332 newsh->handler = routine; 333 newsh->ctx = ctx; 334 newsh->classid = SIGNAL_CLASSID; 335 sh = newsh; 336 PetscFunctionReturn(PETSC_SUCCESS); 337 } 338 339 /*@ 340 PetscPopSignalHandler - Removes the last signal handler that was pushed. 341 If no signal handlers are left on the stack it will remove the PETSc signal handler. 342 (That is PETSc will no longer catch signals). 343 344 Not Collective 345 346 Level: developer 347 348 Note: 349 There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by 350 the loader. That information is lost with the first call to `PetscPushSignalHandler()` 351 352 .seealso: [](sec_errors), `PetscPushSignalHandler()` 353 @*/ 354 PetscErrorCode PetscPopSignalHandler(void) 355 { 356 struct SH *tmp; 357 358 PetscFunctionBegin; 359 if (!sh) PetscFunctionReturn(PETSC_SUCCESS); 360 PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted"); 361 362 tmp = sh; 363 sh = sh->previous; 364 PetscCall(PetscFree(tmp)); 365 if (!sh || !sh->handler) { 366 #if !defined(PETSC_MISSING_SIGALRM) 367 /* signal(SIGALRM, SIG_DFL); */ 368 #endif 369 #if !defined(PETSC_MISSING_SIGBUS) 370 signal(SIGBUS, SIG_DFL); 371 #endif 372 #if !defined(PETSC_MISSING_SIGCONT) 373 /* signal(SIGCONT, SIG_DFL); */ 374 #endif 375 #if !defined(PETSC_MISSING_SIGFPE) 376 signal(SIGFPE, SIG_DFL); 377 #endif 378 #if !defined(PETSC_MISSING_SIGHUP) 379 signal(SIGHUP, SIG_DFL); 380 #endif 381 #if !defined(PETSC_MISSING_SIGILL) 382 signal(SIGILL, SIG_DFL); 383 #endif 384 #if !defined(PETSC_MISSING_SIGINT) 385 /* signal(SIGINT, SIG_DFL); */ 386 #endif 387 #if !defined(PETSC_MISSING_SIGPIPE) 388 signal(SIGPIPE, SIG_DFL); 389 #endif 390 #if !defined(PETSC_MISSING_SIGQUIT) 391 signal(SIGQUIT, SIG_DFL); 392 #endif 393 #if !defined(PETSC_MISSING_SIGSEGV) 394 signal(SIGSEGV, SIG_DFL); 395 #endif 396 #if !defined(PETSC_MISSING_SIGSYS) 397 signal(SIGSYS, SIG_DFL); 398 #endif 399 #if !defined(PETSC_MISSING_SIGTERM) 400 signal(SIGTERM, SIG_DFL); 401 #endif 402 #if !defined(PETSC_MISSING_SIGTRAP) 403 signal(SIGTRAP, SIG_DFL); 404 #endif 405 #if !defined(PETSC_MISSING_SIGTSTP) 406 /* signal(SIGTSTP, SIG_DFL); */ 407 #endif 408 #if !defined(PETSC_MISSING_SIGURG) 409 signal(SIGURG, SIG_DFL); 410 #endif 411 #if !defined(PETSC_MISSING_SIGUSR1) 412 /* signal(SIGUSR1, SIG_DFL); */ 413 #endif 414 #if !defined(PETSC_MISSING_SIGUSR2) 415 /* signal(SIGUSR2, SIG_DFL); */ 416 #endif 417 SignalSet = PETSC_FALSE; 418 } else { 419 SignalSet = PETSC_TRUE; 420 } 421 PetscFunctionReturn(PETSC_SUCCESS); 422 } 423