1 2 /* 3 Routines to handle signals the program will receive. 4 Usually this will call the error handlers. 5 */ 6 #include <petsc/private/petscimpl.h> /*I "petscsys.h" I*/ 7 #include <signal.h> 8 #include <stdlib.h> /* for _Exit() */ 9 10 static PetscClassId SIGNAL_CLASSID = 0; 11 12 struct SH { 13 PetscClassId classid; 14 PetscErrorCode (*handler)(int, void *); 15 void *ctx; 16 struct SH *previous; 17 }; 18 static struct SH *sh = NULL; 19 static PetscBool SignalSet = PETSC_FALSE; 20 21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions. 22 See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745. 23 */ 24 static void MyExit(void) 25 { 26 _Exit(MPI_ERR_OTHER); 27 } 28 29 /* 30 PetscSignalHandler_Private - This is the signal handler called by the system. This calls 31 any signal handler set by PETSc or the application code. 32 33 Input Parameters: (depends on system) 34 . sig - integer code indicating the type of signal 35 . code - ?? 36 . sigcontext - ?? 37 . addr - ?? 38 39 */ 40 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER) 41 static void PetscSignalHandler_Private(int sig, int code, struct sigcontext *scp, char *addr) 42 #else 43 static void PetscSignalHandler_Private(int sig) 44 #endif 45 { 46 PetscErrorCode ierr; 47 48 if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig, (void *)0); 49 else { 50 if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_COR, "Signal object has been corrupted"); 51 ierr = (*sh->handler)(sig, sh->ctx); 52 } 53 if (ierr) PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_COR); 54 } 55 56 /*@ 57 PetscSignalHandlerDefault - Default signal handler. 58 59 Not Collective 60 61 Input Parameters: 62 + sig - signal value 63 - ptr - unused pointer 64 65 Level: advanced 66 67 Developer Notes: 68 This does not call `PetscError()`, handles the entire error process directly 69 70 .seealso: `PetscPushSignalHandler()` 71 @*/ 72 PetscErrorCode PetscSignalHandlerDefault(int sig, void *ptr) 73 { 74 PetscErrorCode ierr; 75 const char *SIGNAME[64]; 76 77 if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi(); 78 SIGNAME[0] = "Unknown signal"; 79 #if !defined(PETSC_MISSING_SIGABRT) 80 SIGNAME[SIGABRT] = "Abort"; 81 #endif 82 #if !defined(PETSC_MISSING_SIGALRM) 83 SIGNAME[SIGALRM] = "Alarm"; 84 #endif 85 #if !defined(PETSC_MISSING_SIGBUS) 86 SIGNAME[SIGBUS] = "BUS: Bus Error, possibly illegal memory access"; 87 #endif 88 #if !defined(PETSC_MISSING_SIGCHLD) 89 SIGNAME[SIGCHLD] = "CHLD"; 90 #endif 91 #if !defined(PETSC_MISSING_SIGCONT) 92 SIGNAME[SIGCONT] = "CONT"; 93 #endif 94 #if !defined(PETSC_MISSING_SIGFPE) 95 SIGNAME[SIGFPE] = "FPE: Floating Point Exception,probably divide by zero"; 96 #endif 97 #if !defined(PETSC_MISSING_SIGHUP) 98 SIGNAME[SIGHUP] = "Hang up: Some other process (or the batch system) has told this process to end"; 99 #endif 100 #if !defined(PETSC_MISSING_SIGILL) 101 SIGNAME[SIGILL] = "Illegal instruction: Likely due to memory corruption"; 102 #endif 103 #if !defined(PETSC_MISSING_SIGINT) 104 SIGNAME[SIGINT] = "Interrupt"; 105 #endif 106 #if !defined(PETSC_MISSING_SIGKILL) 107 SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end"; 108 #endif 109 #if !defined(PETSC_MISSING_SIGPIPE) 110 SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket"; 111 #endif 112 #if !defined(PETSC_MISSING_SIGQUIT) 113 SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end"; 114 #endif 115 #if !defined(PETSC_MISSING_SIGSEGV) 116 SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range"; 117 #endif 118 #if !defined(PETSC_MISSING_SIGSYS) 119 SIGNAME[SIGSYS] = "SYS"; 120 #endif 121 #if !defined(PETSC_MISSING_SIGTERM) 122 SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end"; 123 #endif 124 #if !defined(PETSC_MISSING_SIGTRAP) 125 SIGNAME[SIGTRAP] = "TRAP"; 126 #endif 127 #if !defined(PETSC_MISSING_SIGTSTP) 128 SIGNAME[SIGTSTP] = "TSTP"; 129 #endif 130 #if !defined(PETSC_MISSING_SIGURG) 131 SIGNAME[SIGURG] = "URG"; 132 #endif 133 #if !defined(PETSC_MISSING_SIGUSR1) 134 SIGNAME[SIGUSR1] = "User 1"; 135 #endif 136 #if !defined(PETSC_MISSING_SIGUSR2) 137 SIGNAME[SIGUSR2] = "User 2"; 138 #endif 139 140 signal(sig, SIG_DFL); 141 ierr = PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */ 142 ierr = (*PetscErrorPrintf)("------------------------------------------------------------------------\n"); 143 if (sig >= 0 && sig <= 20) ierr = (*PetscErrorPrintf)("Caught signal number %d %s\n", sig, SIGNAME[sig]); 144 else ierr = (*PetscErrorPrintf)("Caught signal\n"); 145 146 ierr = (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n"); 147 ierr = (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n"); 148 #if defined(PETSC_HAVE_CUDA) 149 ierr = (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n"); 150 #endif 151 #if PetscDefined(USE_DEBUG) 152 #if !PetscDefined(HAVE_THREADSAFETY) 153 ierr = (*PetscErrorPrintf)("--------------------- Stack Frames ------------------------------------\n"); 154 ierr = PetscStackView(PETSC_STDOUT); 155 #endif 156 #else 157 ierr = (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n"); 158 ierr = (*PetscErrorPrintf)("to get more information on the crash.\n"); 159 #endif 160 #if !defined(PETSC_MISSING_SIGBUS) 161 if (sig == SIGSEGV || sig == SIGBUS) { 162 #else 163 if (sig == SIGSEGV) { 164 #endif 165 PetscBool debug; 166 167 ierr = PetscMallocGetDebug(&debug, NULL, NULL); 168 if (debug) ierr = PetscMallocValidate(__LINE__, PETSC_FUNCTION_NAME, __FILE__); 169 else ierr = (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n"); 170 } 171 atexit(MyExit); 172 (void)ierr; 173 PETSCABORT(PETSC_COMM_WORLD, PETSC_ERR_SIG); 174 return PETSC_SUCCESS; 175 } 176 177 #if !defined(PETSC_SIGNAL_CAST) 178 #define PETSC_SIGNAL_CAST 179 #endif 180 181 /*@C 182 PetscPushSignalHandler - Catches the usual fatal errors and 183 calls a user-provided routine. 184 185 Not Collective 186 187 Input Parameters: 188 + routine - routine to call when a signal is received 189 - ctx - optional context needed by the routine 190 191 Level: developer 192 193 Note: 194 There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by 195 the loader. That information is lost with the first call to `PetscPushSignalHandler()` 196 197 .seealso: [](sec_errors), `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()` 198 @*/ 199 PetscErrorCode PetscPushSignalHandler(PetscErrorCode (*routine)(int, void *), void *ctx) 200 { 201 struct SH *newsh; 202 203 PetscFunctionBegin; 204 if (!SIGNAL_CLASSID) { 205 /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */ 206 SIGNAL_CLASSID = 19; 207 } 208 if (!SignalSet && routine) { 209 /* Do not catch ABRT, CHLD, KILL */ 210 #if !defined(PETSC_MISSING_SIGALRM) 211 /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */ 212 #endif 213 #if !defined(PETSC_MISSING_SIGBUS) 214 signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 215 #endif 216 #if !defined(PETSC_MISSING_SIGCONT) 217 /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/ 218 #endif 219 #if !defined(PETSC_MISSING_SIGFPE) 220 signal(SIGFPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 221 #endif 222 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION) 223 { 224 struct sigaction action; 225 sigaction(SIGHUP, NULL, &action); 226 if (action.sa_handler == SIG_IGN) { 227 PetscCall(PetscInfo(NULL, "SIGHUP previously set to ignore, therefore not changing its signal handler\n")); 228 } else { 229 signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 230 } 231 } 232 #endif 233 #if !defined(PETSC_MISSING_SIGILL) 234 signal(SIGILL, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 235 #endif 236 #if !defined(PETSC_MISSING_SIGINT) 237 /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */ 238 #endif 239 #if !defined(PETSC_MISSING_SIGPIPE) 240 signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 241 #endif 242 #if !defined(PETSC_MISSING_SIGQUIT) 243 signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 244 #endif 245 #if !defined(PETSC_MISSING_SIGSEGV) 246 signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 247 #endif 248 #if !defined(PETSC_MISSING_SIGSYS) 249 signal(SIGSYS, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 250 #endif 251 #if !defined(PETSC_MISSING_SIGTERM) 252 #if !defined(OMPI_MAJOR_VERSION) 253 /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */ 254 signal(SIGTERM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 255 #endif 256 #endif 257 #if !defined(PETSC_MISSING_SIGTRAP) 258 signal(SIGTRAP, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 259 #endif 260 #if !defined(PETSC_MISSING_SIGTSTP) 261 /* signal(SIGTSTP, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */ 262 #endif 263 #if !defined(PETSC_MISSING_SIGURG) 264 signal(SIGURG, PETSC_SIGNAL_CAST PetscSignalHandler_Private); 265 #endif 266 #if !defined(PETSC_MISSING_SIGUSR1) 267 /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */ 268 #endif 269 #if !defined(PETSC_MISSING_SIGUSR2) 270 /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */ 271 #endif 272 SignalSet = PETSC_TRUE; 273 } 274 if (!routine) { 275 #if !defined(PETSC_MISSING_SIGALRM) 276 /* signal(SIGALRM, SIG_DFL); */ 277 #endif 278 #if !defined(PETSC_MISSING_SIGBUS) 279 signal(SIGBUS, SIG_DFL); 280 #endif 281 #if !defined(PETSC_MISSING_SIGCONT) 282 /* signal(SIGCONT, SIG_DFL); */ 283 #endif 284 #if !defined(PETSC_MISSING_SIGFPE) 285 signal(SIGFPE, SIG_DFL); 286 #endif 287 #if !defined(PETSC_MISSING_SIGHUP) 288 signal(SIGHUP, SIG_DFL); 289 #endif 290 #if !defined(PETSC_MISSING_SIGILL) 291 signal(SIGILL, SIG_DFL); 292 #endif 293 #if !defined(PETSC_MISSING_SIGINT) 294 /* signal(SIGINT, SIG_DFL); */ 295 #endif 296 #if !defined(PETSC_MISSING_SIGPIPE) 297 signal(SIGPIPE, SIG_DFL); 298 #endif 299 #if !defined(PETSC_MISSING_SIGQUIT) 300 signal(SIGQUIT, SIG_DFL); 301 #endif 302 #if !defined(PETSC_MISSING_SIGSEGV) 303 signal(SIGSEGV, SIG_DFL); 304 #endif 305 #if !defined(PETSC_MISSING_SIGSYS) 306 signal(SIGSYS, SIG_DFL); 307 #endif 308 #if !defined(PETSC_MISSING_SIGTERM) 309 signal(SIGTERM, SIG_DFL); 310 #endif 311 #if !defined(PETSC_MISSING_SIGTRAP) 312 signal(SIGTRAP, SIG_DFL); 313 #endif 314 #if !defined(PETSC_MISSING_SIGTSTP) 315 /* signal(SIGTSTP, SIG_DFL); */ 316 #endif 317 #if !defined(PETSC_MISSING_SIGURG) 318 signal(SIGURG, SIG_DFL); 319 #endif 320 #if !defined(PETSC_MISSING_SIGUSR1) 321 /* signal(SIGUSR1, SIG_DFL); */ 322 #endif 323 #if !defined(PETSC_MISSING_SIGUSR2) 324 /* signal(SIGUSR2, SIG_DFL); */ 325 #endif 326 SignalSet = PETSC_FALSE; 327 } 328 PetscCall(PetscNew(&newsh)); 329 if (sh) { 330 PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted"); 331 newsh->previous = sh; 332 } else newsh->previous = NULL; 333 newsh->handler = routine; 334 newsh->ctx = ctx; 335 newsh->classid = SIGNAL_CLASSID; 336 sh = newsh; 337 PetscFunctionReturn(PETSC_SUCCESS); 338 } 339 340 /*@ 341 PetscPopSignalHandler - Removes the last signal handler that was pushed. 342 If no signal handlers are left on the stack it will remove the PETSc signal handler. 343 (That is PETSc will no longer catch signals). 344 345 Not Collective 346 347 Level: developer 348 349 Note: 350 There is no way to return to a signal handler that was set directly by the user with the UNIX signal handler API or by 351 the loader. That information is lost with the first call to `PetscPushSignalHandler()` 352 353 .seealso: [](sec_errors), `PetscPushSignalHandler()` 354 @*/ 355 PetscErrorCode PetscPopSignalHandler(void) 356 { 357 struct SH *tmp; 358 359 PetscFunctionBegin; 360 if (!sh) PetscFunctionReturn(PETSC_SUCCESS); 361 PetscCheck(sh->classid == SIGNAL_CLASSID, PETSC_COMM_SELF, PETSC_ERR_COR, "Signal object has been corrupted"); 362 363 tmp = sh; 364 sh = sh->previous; 365 PetscCall(PetscFree(tmp)); 366 if (!sh || !sh->handler) { 367 #if !defined(PETSC_MISSING_SIGALRM) 368 /* signal(SIGALRM, SIG_DFL); */ 369 #endif 370 #if !defined(PETSC_MISSING_SIGBUS) 371 signal(SIGBUS, SIG_DFL); 372 #endif 373 #if !defined(PETSC_MISSING_SIGCONT) 374 /* signal(SIGCONT, SIG_DFL); */ 375 #endif 376 #if !defined(PETSC_MISSING_SIGFPE) 377 signal(SIGFPE, SIG_DFL); 378 #endif 379 #if !defined(PETSC_MISSING_SIGHUP) 380 signal(SIGHUP, SIG_DFL); 381 #endif 382 #if !defined(PETSC_MISSING_SIGILL) 383 signal(SIGILL, SIG_DFL); 384 #endif 385 #if !defined(PETSC_MISSING_SIGINT) 386 /* signal(SIGINT, SIG_DFL); */ 387 #endif 388 #if !defined(PETSC_MISSING_SIGPIPE) 389 signal(SIGPIPE, SIG_DFL); 390 #endif 391 #if !defined(PETSC_MISSING_SIGQUIT) 392 signal(SIGQUIT, SIG_DFL); 393 #endif 394 #if !defined(PETSC_MISSING_SIGSEGV) 395 signal(SIGSEGV, SIG_DFL); 396 #endif 397 #if !defined(PETSC_MISSING_SIGSYS) 398 signal(SIGSYS, SIG_DFL); 399 #endif 400 #if !defined(PETSC_MISSING_SIGTERM) 401 signal(SIGTERM, SIG_DFL); 402 #endif 403 #if !defined(PETSC_MISSING_SIGTRAP) 404 signal(SIGTRAP, SIG_DFL); 405 #endif 406 #if !defined(PETSC_MISSING_SIGTSTP) 407 /* signal(SIGTSTP, SIG_DFL); */ 408 #endif 409 #if !defined(PETSC_MISSING_SIGURG) 410 signal(SIGURG, SIG_DFL); 411 #endif 412 #if !defined(PETSC_MISSING_SIGUSR1) 413 /* signal(SIGUSR1, SIG_DFL); */ 414 #endif 415 #if !defined(PETSC_MISSING_SIGUSR2) 416 /* signal(SIGUSR2, SIG_DFL); */ 417 #endif 418 SignalSet = PETSC_FALSE; 419 } else { 420 SignalSet = PETSC_TRUE; 421 } 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424