xref: /petsc/src/sys/error/errtrace.c (revision 03047865b8d8757cf1cf9cda45785c1537b01dc1)
10039db0dSBarry Smith #define PETSC_DESIRE_FEATURE_TEST_MACROS /* for fileno() */
2c6db04a5SJed Brown #include <petscsys.h>                    /*I "petscsys.h" I*/
3f67a399dSBarry Smith #include <petsc/private/petscimpl.h>
4c6db04a5SJed Brown #include <petscconfiginfo.h>
5114011d0SBarry Smith #if defined(PETSC_HAVE_UNISTD_H)
6114011d0SBarry Smith   #include <unistd.h>
7114011d0SBarry Smith #endif
89beb8f72SToby Isaac #include "err.h"
935f00c14SToby Isaac #include <petsc/private/logimpl.h> // PETSC_TLS
10e5c89e4eSSatish Balay 
11456dbbcdSJunchao Zhang #if defined(PETSC_HAVE_CUPM)
12456dbbcdSJunchao Zhang   #include <petsc/private/deviceimpl.h>
13456dbbcdSJunchao Zhang #endif
14456dbbcdSJunchao Zhang 
15e5c89e4eSSatish Balay /*@C
16aaa8cc7dSPierre Jolivet   PetscIgnoreErrorHandler - Deprecated, use `PetscReturnErrorHandler()`. Ignores the error, allows program to continue as if error did not occur
17e5c89e4eSSatish Balay 
18cc4c1da9SBarry Smith   Not Collective, No Fortran Support
19e5c89e4eSSatish Balay 
20e5c89e4eSSatish Balay   Input Parameters:
21e32f2f54SBarry Smith + comm - communicator over which error occurred
22e32f2f54SBarry Smith . line - the line number of the error (indicated by __LINE__)
2310450e9eSJacob Faibussowitsch . fun  - the function name
24e5c89e4eSSatish Balay . file - the file in which the error was detected (indicated by __FILE__)
25e5c89e4eSSatish Balay . mess - an error text string, usually just printed to the screen
26e5c89e4eSSatish Balay . n    - the generic error number
27e5c89e4eSSatish Balay . p    - specific error number
28e5c89e4eSSatish Balay - ctx  - error handler context
29e5c89e4eSSatish Balay 
30e5c89e4eSSatish Balay   Level: developer
31e5c89e4eSSatish Balay 
32811af0c4SBarry Smith   Note:
33811af0c4SBarry Smith   Users do not directly call this routine
34e5c89e4eSSatish Balay 
35db781477SPatrick Sanan .seealso: `PetscReturnErrorHandler()`
36e5c89e4eSSatish Balay  @*/
PetscIgnoreErrorHandler(MPI_Comm comm,int line,const char * fun,const char * file,PetscErrorCode n,PetscErrorType p,const char * mess,PetscCtx ctx)37*2a8381b2SBarry Smith PetscErrorCode PetscIgnoreErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, PetscCtx ctx)
38d71ae5a4SJacob Faibussowitsch {
3910450e9eSJacob Faibussowitsch   (void)comm;
4010450e9eSJacob Faibussowitsch   (void)line;
4110450e9eSJacob Faibussowitsch   (void)fun;
4210450e9eSJacob Faibussowitsch   (void)file;
4310450e9eSJacob Faibussowitsch   (void)p;
4410450e9eSJacob Faibussowitsch   (void)mess;
4510450e9eSJacob Faibussowitsch   (void)ctx;
4611cc89d2SBarry Smith   return n;
47e5c89e4eSSatish Balay }
48e5c89e4eSSatish Balay 
495abee1b0SJed Brown static char      arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
50ace3abfcSBarry Smith static PetscBool PetscErrorPrintfInitializeCalled = PETSC_FALSE;
513f6e4ae9SSatish Balay static char      version[256];
52107894f0SSatish Balay 
53107894f0SSatish Balay /*
54107894f0SSatish Balay    Initializes arch, hostname, username, date so that system calls do NOT need
55107894f0SSatish Balay    to be made during the error handler.
56107894f0SSatish Balay */
PetscErrorPrintfInitialize(void)57d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscErrorPrintfInitialize(void)
58d71ae5a4SJacob Faibussowitsch {
59ace3abfcSBarry Smith   PetscBool use_stdout = PETSC_FALSE, use_none = PETSC_FALSE;
60107894f0SSatish Balay 
61107894f0SSatish Balay   PetscFunctionBegin;
629566063dSJacob Faibussowitsch   PetscCall(PetscGetArchType(arch, sizeof(arch)));
639566063dSJacob Faibussowitsch   PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
649566063dSJacob Faibussowitsch   PetscCall(PetscGetUserName(username, sizeof(username)));
659566063dSJacob Faibussowitsch   PetscCall(PetscGetProgramName(pname, sizeof(pname)));
669566063dSJacob Faibussowitsch   PetscCall(PetscGetDate(date, sizeof(date)));
679566063dSJacob Faibussowitsch   PetscCall(PetscGetVersion(version, sizeof(version)));
68e8fb0fc0SBarry Smith 
699566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_stdout", &use_stdout, NULL));
70a297a907SKarl Rupp   if (use_stdout) PETSC_STDERR = PETSC_STDOUT;
719566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_none", &use_none, NULL));
72a297a907SKarl Rupp   if (use_none) PetscErrorPrintf = PetscErrorPrintfNone;
73107894f0SSatish Balay   PetscErrorPrintfInitializeCalled = PETSC_TRUE;
743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
75107894f0SSatish Balay }
76107894f0SSatish Balay 
PetscErrorPrintfNone(const char format[],...)77d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscErrorPrintfNone(const char format[], ...)
78d71ae5a4SJacob Faibussowitsch {
793ba16761SJacob Faibussowitsch   return PETSC_SUCCESS;
80e8fb0fc0SBarry Smith }
81e8fb0fc0SBarry Smith 
PetscErrorPrintfDefault(const char format[],...)82d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscErrorPrintfDefault(const char format[], ...)
83d71ae5a4SJacob Faibussowitsch {
84e8fb0fc0SBarry Smith   va_list          Argp;
85ace3abfcSBarry Smith   static PetscBool PetscErrorPrintfCalled = PETSC_FALSE;
86e8fb0fc0SBarry Smith 
87e8fb0fc0SBarry Smith   /*
88e8fb0fc0SBarry Smith       This function does not call PetscFunctionBegin and PetscFunctionReturn() because
89e8fb0fc0SBarry Smith     it may be called by PetscStackView().
90e8fb0fc0SBarry Smith 
91e8fb0fc0SBarry Smith       This function does not do error checking because it is called by the error handlers.
92e8fb0fc0SBarry Smith   */
93e8fb0fc0SBarry Smith 
94e8fb0fc0SBarry Smith   if (!PetscErrorPrintfCalled) {
95e8fb0fc0SBarry Smith     PetscErrorPrintfCalled = PETSC_TRUE;
96e8fb0fc0SBarry Smith 
97e8fb0fc0SBarry Smith     /*
98e8fb0fc0SBarry Smith         On the SGI machines and Cray T3E, if errors are generated  "simultaneously" by
99e8fb0fc0SBarry Smith       different processors, the messages are printed all jumbled up; to try to
100e8fb0fc0SBarry Smith       prevent this we have each processor wait based on their rank
101e8fb0fc0SBarry Smith     */
102e8fb0fc0SBarry Smith #if defined(PETSC_CAN_SLEEP_AFTER_ERROR)
103e8fb0fc0SBarry Smith     {
1043ba16761SJacob Faibussowitsch       PetscMPIInt rank = PetscGlobalRank > 8 ? 8 : PetscGlobalRank;
105dd460d27SBarry Smith       (void)PetscSleep((PetscReal)rank);
106e8fb0fc0SBarry Smith     }
107e8fb0fc0SBarry Smith #endif
108e8fb0fc0SBarry Smith   }
109e8fb0fc0SBarry Smith 
110dd460d27SBarry Smith   (void)PetscFPrintf(PETSC_COMM_SELF, PETSC_STDERR, "[%d]PETSC ERROR: ", PetscGlobalRank);
111e8fb0fc0SBarry Smith   va_start(Argp, format);
112dd460d27SBarry Smith   (void)(*PetscVFPrintf)(PETSC_STDERR, format, Argp);
113e8fb0fc0SBarry Smith   va_end(Argp);
1143ba16761SJacob Faibussowitsch   return PETSC_SUCCESS;
115e8fb0fc0SBarry Smith }
116e8fb0fc0SBarry Smith 
117c2eed0edSBarry Smith /*
118c2eed0edSBarry Smith    On some systems when the stderr is nested through several levels of shell script
119c2eed0edSBarry Smith    before being passed to a file the isatty() falsely returns true resulting in
120c2eed0edSBarry Smith    the screen highlight variables being passed through the test harness. Therefore
121c2eed0edSBarry Smith    simply do not highlight when the PETSC_STDERR is PETSC_STDOUT.
122c2eed0edSBarry Smith */
PetscErrorPrintfHilight(void)123d71ae5a4SJacob Faibussowitsch static void PetscErrorPrintfHilight(void)
124d71ae5a4SJacob Faibussowitsch {
12598ed35c3SBarry Smith #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
126c2eed0edSBarry Smith   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
127114011d0SBarry Smith     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[1;31m");
128114011d0SBarry Smith   }
129114011d0SBarry Smith #endif
130114011d0SBarry Smith }
131114011d0SBarry Smith 
PetscErrorPrintfNormal(void)132d71ae5a4SJacob Faibussowitsch static void PetscErrorPrintfNormal(void)
133d71ae5a4SJacob Faibussowitsch {
13498ed35c3SBarry Smith #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
135c2eed0edSBarry Smith   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
136114011d0SBarry Smith     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[0;39m\033[0;49m");
137114011d0SBarry Smith   }
138114011d0SBarry Smith #endif
139114011d0SBarry Smith }
140114011d0SBarry Smith 
14195c0884eSLisandro Dalcin PETSC_EXTERN PetscErrorCode PetscOptionsViewError(void);
142114011d0SBarry Smith 
14335f00c14SToby Isaac static PETSC_TLS PetscBool petsc_traceback_error_silent = PETSC_FALSE;
14435f00c14SToby Isaac 
145e5c89e4eSSatish Balay /*@C
146e5c89e4eSSatish Balay 
147e5c89e4eSSatish Balay   PetscTraceBackErrorHandler - Default error handler routine that generates
148e5c89e4eSSatish Balay   a traceback on error detection.
149e5c89e4eSSatish Balay 
150cc4c1da9SBarry Smith   Not Collective, No Fortran Support
151e5c89e4eSSatish Balay 
152e5c89e4eSSatish Balay   Input Parameters:
153e32f2f54SBarry Smith + comm - communicator over which error occurred
1546e25c4a1SBarry Smith . line - the line number of the error (usually indicated by `__LINE__` in the calling routine)
15510450e9eSJacob Faibussowitsch . fun  - the function name
1566e25c4a1SBarry Smith . file - the file in which the error was detected (usually indicated by `__FILE__` in the calling routine)
157e5c89e4eSSatish Balay . mess - an error text string, usually just printed to the screen
158e5c89e4eSSatish Balay . n    - the generic error number
159811af0c4SBarry Smith . p    - `PETSC_ERROR_INITIAL` if this is the first call the error handler, otherwise `PETSC_ERROR_REPEAT`
160e5c89e4eSSatish Balay - ctx  - error handler context
161e5c89e4eSSatish Balay 
162811af0c4SBarry Smith   Options Database Keys:
1636e25c4a1SBarry Smith + -error_output_stdout - output the error messages to `stdout` instead of the default `stderr`
16445b666d6SBarry Smith - -error_output_none   - do not output the error messages
165e5c89e4eSSatish Balay 
166e5c89e4eSSatish Balay   Notes:
167811af0c4SBarry Smith   Users do not directly call this routine
168e5c89e4eSSatish Balay 
169811af0c4SBarry Smith   Use `PetscPushErrorHandler()` to set the desired error handler.
170e5c89e4eSSatish Balay 
17145b666d6SBarry Smith   Level: developer
172e5c89e4eSSatish Balay 
173db781477SPatrick Sanan .seealso: `PetscError()`, `PetscPushErrorHandler()`, `PetscPopErrorHandler()`, `PetscAttachDebuggerErrorHandler()`,
1746e25c4a1SBarry Smith           `PetscAbortErrorHandler()`, `PetscMPIAbortErrorHandler()`, `PetscReturnErrorHandler()`, `PetscEmacsClientErrorHandler()`,
1756e25c4a1SBarry Smith            `PETSC_ERROR_INITIAL`, `PETSC_ERROR_REPEAT`, `PetscErrorCode`, `PetscErrorType`
176e5c89e4eSSatish Balay  @*/
PetscTraceBackErrorHandler(MPI_Comm comm,int line,const char * fun,const char * file,PetscErrorCode n,PetscErrorType p,const char * mess,PetscCtx ctx)177*2a8381b2SBarry Smith PetscErrorCode PetscTraceBackErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, PetscCtx ctx)
178d71ae5a4SJacob Faibussowitsch {
179997adca8SBarry Smith   PetscMPIInt rank = 0;
180e5c89e4eSSatish Balay 
18110450e9eSJacob Faibussowitsch   (void)ctx;
182a297a907SKarl Rupp   if (comm != PETSC_COMM_SELF) MPI_Comm_rank(comm, &rank);
183a297a907SKarl Rupp 
18435f00c14SToby Isaac   // reinitialize the error handler when a new initializing error is detected
18535f00c14SToby Isaac   if (p != PETSC_ERROR_REPEAT) {
18635f00c14SToby Isaac     petsc_traceback_error_silent = PETSC_FALSE;
18735f00c14SToby Isaac     if (PetscCIEnabledPortableErrorOutput) {
18835f00c14SToby Isaac       PetscMPIInt size = 1;
18935f00c14SToby Isaac 
19035f00c14SToby Isaac       if (comm != MPI_COMM_NULL) MPI_Comm_size(comm, &size);
19135f00c14SToby Isaac       petscabortmpifinalize = (size == PetscGlobalSize) ? PETSC_TRUE : PETSC_FALSE;
19235f00c14SToby Isaac     }
19335f00c14SToby Isaac   }
19435f00c14SToby Isaac 
19535f00c14SToby Isaac   if (rank == 0 && (!PetscCIEnabledPortableErrorOutput || PetscGlobalRank == 0) && (p != PETSC_ERROR_REPEAT || !petsc_traceback_error_silent)) {
196114011d0SBarry Smith     static int cnt    = 1;
1974237731aSLisandro Dalcin     PetscBool  python = (n == PETSC_ERR_PYTHON && cnt == 1) ? PETSC_TRUE : PETSC_FALSE;
198114011d0SBarry Smith 
1994237731aSLisandro Dalcin     if (p == PETSC_ERROR_INITIAL || python) {
200114011d0SBarry Smith       PetscErrorPrintfHilight();
201dd460d27SBarry Smith       (void)(*PetscErrorPrintf)("--------------------- Error Message --------------------------------------------------------------\n");
2025f3a2c8dSBarry Smith       PetscErrorPrintfNormal();
2034e29e845SStefano Zampini       if (cnt > 1) {
204dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("  It appears a new error in the code was triggered after a previous error, possibly because:\n");
205dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("  -  The first error was not properly handled via (for example) the use of\n");
206dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("     PetscCall(TheFunctionThatErrors()); or\n");
207dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("  -  The second error was triggered while handling the first error.\n");
208dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("  Above is the traceback for the previous unhandled error, below the traceback for the next error\n");
209dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("  ALL ERRORS in the PETSc libraries are fatal, you should add the appropriate error checking to the code\n");
2104e29e845SStefano Zampini         cnt = 1;
2114e29e845SStefano Zampini       }
2124e29e845SStefano Zampini     }
2134e29e845SStefano Zampini     if (cnt == 1) {
214dd460d27SBarry Smith       if (n == PETSC_ERR_MEM || n == PETSC_ERR_MEM_LEAK) (void)PetscErrorMemoryMessage(n);
215a297a907SKarl Rupp       else {
216e5c89e4eSSatish Balay         const char *text;
217dd460d27SBarry Smith         (void)PetscErrorMessage(n, &text, NULL);
218dd460d27SBarry Smith         if (text) (void)(*PetscErrorPrintf)("%s\n", text);
219e5c89e4eSSatish Balay       }
2204237731aSLisandro Dalcin       if (python) (void)PetscPythonPrintError();
2214237731aSLisandro Dalcin       else if (mess) (void)(*PetscErrorPrintf)("%s\n", mess);
2223187aabaSJunchao Zhang #if defined(PETSC_PKG_CUDA_MIN_ARCH)
2233187aabaSJunchao Zhang       int confCudaArch = PETSC_PKG_CUDA_MIN_ARCH;    // if PETSc was configured with numbered CUDA arches, get the min arch.
224456dbbcdSJunchao Zhang       int runCudaArch  = PetscDeviceCUPMRuntimeArch; // 0 indicates the code has never initialized a cuda device.
225456dbbcdSJunchao Zhang       if (runCudaArch && confCudaArch > runCudaArch) {
226dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("WARNING! Run on a CUDA device with GPU architecture %d, but PETSc was configured with a minimal GPU architecture %d.\n", runCudaArch, confCudaArch);
227dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("If it is a cudaErrorNoKernelImageForDevice error, you may need to reconfigure PETSc with --with-cuda-arch=%d or --with-cuda-arch=%d,%d\n", runCudaArch, runCudaArch, confCudaArch);
228456dbbcdSJunchao Zhang       }
229456dbbcdSJunchao Zhang #endif
230dd460d27SBarry Smith       (void)PetscOptionsLeftError();
231dd460d27SBarry Smith       (void)(*PetscErrorPrintf)("See https://petsc.org/release/faq/ for trouble shooting.\n");
232660278c0SBarry Smith       if (!PetscCIEnabledPortableErrorOutput) {
23336ff62a9SBarry Smith         size_t clen;
23436ff62a9SBarry Smith 
235dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("%s\n", version);
236dd460d27SBarry Smith         if (PetscErrorPrintfInitializeCalled) (void)(*PetscErrorPrintf)("%s with %d MPI process(es) and PETSC_ARCH %s on %s by %s %s\n", pname, PetscGlobalSize, arch, hostname, username, date);
237dd460d27SBarry Smith         (void)PetscStrlen(petscconfigureoptions, &clen);
238dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("Configure options: %s\n", clen ? petscconfigureoptions : "none used");
239107894f0SSatish Balay       }
240660278c0SBarry Smith     }
241997adca8SBarry Smith     /* print line of stack trace */
242dd460d27SBarry Smith     if (fun) (void)(*PetscErrorPrintf)("#%d %s() at %s:%d\n", cnt++, fun, PetscCIFilename(file), PetscCILinenumber(line));
243dd460d27SBarry Smith     else if (file) (void)(*PetscErrorPrintf)("#%d %s:%d\n", cnt++, PetscCIFilename(file), PetscCILinenumber(line));
24449c86fc7SBarry Smith     if (fun) {
245bbcf679cSJacob Faibussowitsch       PetscBool ismain = PETSC_FALSE;
246bbcf679cSJacob Faibussowitsch 
247dd460d27SBarry Smith       (void)PetscStrncmp(fun, "main", 4, &ismain);
248fbfcfee5SBarry Smith       if (ismain) {
249dd460d27SBarry Smith         if ((n <= PETSC_ERR_MIN_VALUE) || (n >= PETSC_ERR_MAX_VALUE)) (void)(*PetscErrorPrintf)("Reached the main program with an out-of-range error code %d. This should never happen\n", n);
250dd460d27SBarry Smith         (void)PetscOptionsViewError();
251114011d0SBarry Smith         PetscErrorPrintfHilight();
252dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("----------------End of Error Message -------send entire error message to petsc-maint@mcs.anl.gov----------\n");
253114011d0SBarry Smith         PetscErrorPrintfNormal();
254114011d0SBarry Smith       }
25549c86fc7SBarry Smith     }
256997adca8SBarry Smith   } else {
25735f00c14SToby Isaac     // silence this process's stacktrace if it is not the root of an originating error
25835f00c14SToby Isaac     if (p != PETSC_ERROR_REPEAT && rank) petsc_traceback_error_silent = PETSC_TRUE;
25935f00c14SToby Isaac     if (fun) {
26035f00c14SToby Isaac       PetscBool ismain = PETSC_FALSE;
26135f00c14SToby Isaac 
262dd460d27SBarry Smith       (void)PetscStrncmp(fun, "main", 4, &ismain);
26335f00c14SToby Isaac       if (ismain && petsc_traceback_error_silent) {
26435f00c14SToby Isaac         /* This results from PetscError() being called in main: PETSCABORT()
26535f00c14SToby Isaac            will be called after the error handler.  But this thread is not the
26635f00c14SToby Isaac            root rank of the communicator that initialized the error.  So sleep
26735f00c14SToby Isaac            to allow the root thread to finish its printing.
26835f00c14SToby Isaac 
26935f00c14SToby Isaac            (Unless this is running CI, in which case do not sleep because
27035f00c14SToby Isaac            we expect all processes to call MPI_Finalize() and make a clean
27135f00c14SToby Isaac            exit.) */
272dd460d27SBarry Smith         if (!PetscCIEnabledPortableErrorOutput) (void)PetscSleep(10.0);
27335f00c14SToby Isaac       }
27435f00c14SToby Isaac     }
275997adca8SBarry Smith   }
276362febeeSStefano Zampini   return n;
277e5c89e4eSSatish Balay }
278