xref: /petsc/src/sys/error/errtrace.c (revision 4237731afe36b4db0a3147aff0aed51d4657afa2) !
10039db0dSBarry Smith #define PETSC_DESIRE_FEATURE_TEST_MACROS /* for fileno() */
2c6db04a5SJed Brown #include <petscsys.h>                    /*I "petscsys.h" I*/
3f67a399dSBarry Smith #include <petsc/private/petscimpl.h>
4c6db04a5SJed Brown #include <petscconfiginfo.h>
5114011d0SBarry Smith #if defined(PETSC_HAVE_UNISTD_H)
6114011d0SBarry Smith   #include <unistd.h>
7114011d0SBarry Smith #endif
89beb8f72SToby Isaac #include "err.h"
935f00c14SToby Isaac #include <petsc/private/logimpl.h> // PETSC_TLS
10e5c89e4eSSatish Balay 
11456dbbcdSJunchao Zhang #if defined(PETSC_HAVE_CUPM)
12456dbbcdSJunchao Zhang   #include <petsc/private/deviceimpl.h>
13456dbbcdSJunchao Zhang #endif
14456dbbcdSJunchao Zhang 
15e5c89e4eSSatish Balay /*@C
16aaa8cc7dSPierre Jolivet   PetscIgnoreErrorHandler - Deprecated, use `PetscReturnErrorHandler()`. Ignores the error, allows program to continue as if error did not occur
17e5c89e4eSSatish Balay 
18cc4c1da9SBarry Smith   Not Collective, No Fortran Support
19e5c89e4eSSatish Balay 
20e5c89e4eSSatish Balay   Input Parameters:
21e32f2f54SBarry Smith + comm - communicator over which error occurred
22e32f2f54SBarry Smith . line - the line number of the error (indicated by __LINE__)
2310450e9eSJacob Faibussowitsch . fun  - the function name
24e5c89e4eSSatish Balay . file - the file in which the error was detected (indicated by __FILE__)
25e5c89e4eSSatish Balay . mess - an error text string, usually just printed to the screen
26e5c89e4eSSatish Balay . n    - the generic error number
27e5c89e4eSSatish Balay . p    - specific error number
28e5c89e4eSSatish Balay - ctx  - error handler context
29e5c89e4eSSatish Balay 
30e5c89e4eSSatish Balay   Level: developer
31e5c89e4eSSatish Balay 
32811af0c4SBarry Smith   Note:
33811af0c4SBarry Smith   Users do not directly call this routine
34e5c89e4eSSatish Balay 
35db781477SPatrick Sanan .seealso: `PetscReturnErrorHandler()`
36e5c89e4eSSatish Balay  @*/
37d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscIgnoreErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, void *ctx)
38d71ae5a4SJacob Faibussowitsch {
3910450e9eSJacob Faibussowitsch   (void)comm;
4010450e9eSJacob Faibussowitsch   (void)line;
4110450e9eSJacob Faibussowitsch   (void)fun;
4210450e9eSJacob Faibussowitsch   (void)file;
4310450e9eSJacob Faibussowitsch   (void)p;
4410450e9eSJacob Faibussowitsch   (void)mess;
4510450e9eSJacob Faibussowitsch   (void)ctx;
4611cc89d2SBarry Smith   return n;
47e5c89e4eSSatish Balay }
48e5c89e4eSSatish Balay 
49107894f0SSatish Balay /* ---------------------------------------------------------------------------------------*/
50107894f0SSatish Balay 
515abee1b0SJed Brown static char      arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
52ace3abfcSBarry Smith static PetscBool PetscErrorPrintfInitializeCalled = PETSC_FALSE;
533f6e4ae9SSatish Balay static char      version[256];
54107894f0SSatish Balay 
55107894f0SSatish Balay /*
56107894f0SSatish Balay    Initializes arch, hostname, username, date so that system calls do NOT need
57107894f0SSatish Balay    to be made during the error handler.
58107894f0SSatish Balay */
59d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscErrorPrintfInitialize(void)
60d71ae5a4SJacob Faibussowitsch {
61ace3abfcSBarry Smith   PetscBool use_stdout = PETSC_FALSE, use_none = PETSC_FALSE;
62107894f0SSatish Balay 
63107894f0SSatish Balay   PetscFunctionBegin;
649566063dSJacob Faibussowitsch   PetscCall(PetscGetArchType(arch, sizeof(arch)));
659566063dSJacob Faibussowitsch   PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
669566063dSJacob Faibussowitsch   PetscCall(PetscGetUserName(username, sizeof(username)));
679566063dSJacob Faibussowitsch   PetscCall(PetscGetProgramName(pname, sizeof(pname)));
689566063dSJacob Faibussowitsch   PetscCall(PetscGetDate(date, sizeof(date)));
699566063dSJacob Faibussowitsch   PetscCall(PetscGetVersion(version, sizeof(version)));
70e8fb0fc0SBarry Smith 
719566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_stdout", &use_stdout, NULL));
72a297a907SKarl Rupp   if (use_stdout) PETSC_STDERR = PETSC_STDOUT;
739566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_none", &use_none, NULL));
74a297a907SKarl Rupp   if (use_none) PetscErrorPrintf = PetscErrorPrintfNone;
75107894f0SSatish Balay   PetscErrorPrintfInitializeCalled = PETSC_TRUE;
763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
77107894f0SSatish Balay }
78107894f0SSatish Balay 
79d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscErrorPrintfNone(const char format[], ...)
80d71ae5a4SJacob Faibussowitsch {
813ba16761SJacob Faibussowitsch   return PETSC_SUCCESS;
82e8fb0fc0SBarry Smith }
83e8fb0fc0SBarry Smith 
84d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscErrorPrintfDefault(const char format[], ...)
85d71ae5a4SJacob Faibussowitsch {
86e8fb0fc0SBarry Smith   va_list          Argp;
87ace3abfcSBarry Smith   static PetscBool PetscErrorPrintfCalled = PETSC_FALSE;
88e8fb0fc0SBarry Smith 
89e8fb0fc0SBarry Smith   /*
90e8fb0fc0SBarry Smith       This function does not call PetscFunctionBegin and PetscFunctionReturn() because
91e8fb0fc0SBarry Smith     it may be called by PetscStackView().
92e8fb0fc0SBarry Smith 
93e8fb0fc0SBarry Smith       This function does not do error checking because it is called by the error handlers.
94e8fb0fc0SBarry Smith   */
95e8fb0fc0SBarry Smith 
96e8fb0fc0SBarry Smith   if (!PetscErrorPrintfCalled) {
97e8fb0fc0SBarry Smith     PetscErrorPrintfCalled = PETSC_TRUE;
98e8fb0fc0SBarry Smith 
99e8fb0fc0SBarry Smith     /*
100e8fb0fc0SBarry Smith         On the SGI machines and Cray T3E, if errors are generated  "simultaneously" by
101e8fb0fc0SBarry Smith       different processors, the messages are printed all jumbled up; to try to
102e8fb0fc0SBarry Smith       prevent this we have each processor wait based on their rank
103e8fb0fc0SBarry Smith     */
104e8fb0fc0SBarry Smith #if defined(PETSC_CAN_SLEEP_AFTER_ERROR)
105e8fb0fc0SBarry Smith     {
1063ba16761SJacob Faibussowitsch       PetscMPIInt rank = PetscGlobalRank > 8 ? 8 : PetscGlobalRank;
107dd460d27SBarry Smith       (void)PetscSleep((PetscReal)rank);
108e8fb0fc0SBarry Smith     }
109e8fb0fc0SBarry Smith #endif
110e8fb0fc0SBarry Smith   }
111e8fb0fc0SBarry Smith 
112dd460d27SBarry Smith   (void)PetscFPrintf(PETSC_COMM_SELF, PETSC_STDERR, "[%d]PETSC ERROR: ", PetscGlobalRank);
113e8fb0fc0SBarry Smith   va_start(Argp, format);
114dd460d27SBarry Smith   (void)(*PetscVFPrintf)(PETSC_STDERR, format, Argp);
115e8fb0fc0SBarry Smith   va_end(Argp);
1163ba16761SJacob Faibussowitsch   return PETSC_SUCCESS;
117e8fb0fc0SBarry Smith }
118e8fb0fc0SBarry Smith 
119c2eed0edSBarry Smith /*
120c2eed0edSBarry Smith    On some systems when the stderr is nested through several levels of shell script
121c2eed0edSBarry Smith    before being passed to a file the isatty() falsely returns true resulting in
122c2eed0edSBarry Smith    the screen highlight variables being passed through the test harness. Therefore
123c2eed0edSBarry Smith    simply do not highlight when the PETSC_STDERR is PETSC_STDOUT.
124c2eed0edSBarry Smith */
125d71ae5a4SJacob Faibussowitsch static void PetscErrorPrintfHilight(void)
126d71ae5a4SJacob Faibussowitsch {
12798ed35c3SBarry Smith #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
128c2eed0edSBarry Smith   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
129114011d0SBarry Smith     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[1;31m");
130114011d0SBarry Smith   }
131114011d0SBarry Smith #endif
132114011d0SBarry Smith }
133114011d0SBarry Smith 
134d71ae5a4SJacob Faibussowitsch static void PetscErrorPrintfNormal(void)
135d71ae5a4SJacob Faibussowitsch {
13698ed35c3SBarry Smith #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
137c2eed0edSBarry Smith   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
138114011d0SBarry Smith     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[0;39m\033[0;49m");
139114011d0SBarry Smith   }
140114011d0SBarry Smith #endif
141114011d0SBarry Smith }
142114011d0SBarry Smith 
14395c0884eSLisandro Dalcin PETSC_EXTERN PetscErrorCode PetscOptionsViewError(void);
144114011d0SBarry Smith 
14535f00c14SToby Isaac static PETSC_TLS PetscBool petsc_traceback_error_silent = PETSC_FALSE;
14635f00c14SToby Isaac 
147e5c89e4eSSatish Balay /*@C
148e5c89e4eSSatish Balay 
149e5c89e4eSSatish Balay   PetscTraceBackErrorHandler - Default error handler routine that generates
150e5c89e4eSSatish Balay   a traceback on error detection.
151e5c89e4eSSatish Balay 
152cc4c1da9SBarry Smith   Not Collective, No Fortran Support
153e5c89e4eSSatish Balay 
154e5c89e4eSSatish Balay   Input Parameters:
155e32f2f54SBarry Smith + comm - communicator over which error occurred
1566e25c4a1SBarry Smith . line - the line number of the error (usually indicated by `__LINE__` in the calling routine)
15710450e9eSJacob Faibussowitsch . fun  - the function name
1586e25c4a1SBarry Smith . file - the file in which the error was detected (usually indicated by `__FILE__` in the calling routine)
159e5c89e4eSSatish Balay . mess - an error text string, usually just printed to the screen
160e5c89e4eSSatish Balay . n    - the generic error number
161811af0c4SBarry Smith . p    - `PETSC_ERROR_INITIAL` if this is the first call the error handler, otherwise `PETSC_ERROR_REPEAT`
162e5c89e4eSSatish Balay - ctx  - error handler context
163e5c89e4eSSatish Balay 
164811af0c4SBarry Smith   Options Database Keys:
1656e25c4a1SBarry Smith + -error_output_stdout - output the error messages to `stdout` instead of the default `stderr`
16645b666d6SBarry Smith - -error_output_none   - do not output the error messages
167e5c89e4eSSatish Balay 
168e5c89e4eSSatish Balay   Notes:
169811af0c4SBarry Smith   Users do not directly call this routine
170e5c89e4eSSatish Balay 
171811af0c4SBarry Smith   Use `PetscPushErrorHandler()` to set the desired error handler.
172e5c89e4eSSatish Balay 
17345b666d6SBarry Smith   Level: developer
174e5c89e4eSSatish Balay 
175db781477SPatrick Sanan .seealso: `PetscError()`, `PetscPushErrorHandler()`, `PetscPopErrorHandler()`, `PetscAttachDebuggerErrorHandler()`,
1766e25c4a1SBarry Smith           `PetscAbortErrorHandler()`, `PetscMPIAbortErrorHandler()`, `PetscReturnErrorHandler()`, `PetscEmacsClientErrorHandler()`,
1776e25c4a1SBarry Smith            `PETSC_ERROR_INITIAL`, `PETSC_ERROR_REPEAT`, `PetscErrorCode`, `PetscErrorType`
178e5c89e4eSSatish Balay  @*/
179d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscTraceBackErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, void *ctx)
180d71ae5a4SJacob Faibussowitsch {
181997adca8SBarry Smith   PetscMPIInt rank = 0;
182e5c89e4eSSatish Balay 
18310450e9eSJacob Faibussowitsch   (void)ctx;
184a297a907SKarl Rupp   if (comm != PETSC_COMM_SELF) MPI_Comm_rank(comm, &rank);
185a297a907SKarl Rupp 
18635f00c14SToby Isaac   // reinitialize the error handler when a new initializing error is detected
18735f00c14SToby Isaac   if (p != PETSC_ERROR_REPEAT) {
18835f00c14SToby Isaac     petsc_traceback_error_silent = PETSC_FALSE;
18935f00c14SToby Isaac     if (PetscCIEnabledPortableErrorOutput) {
19035f00c14SToby Isaac       PetscMPIInt size = 1;
19135f00c14SToby Isaac 
19235f00c14SToby Isaac       if (comm != MPI_COMM_NULL) MPI_Comm_size(comm, &size);
19335f00c14SToby Isaac       petscabortmpifinalize = (size == PetscGlobalSize) ? PETSC_TRUE : PETSC_FALSE;
19435f00c14SToby Isaac     }
19535f00c14SToby Isaac   }
19635f00c14SToby Isaac 
19735f00c14SToby Isaac   if (rank == 0 && (!PetscCIEnabledPortableErrorOutput || PetscGlobalRank == 0) && (p != PETSC_ERROR_REPEAT || !petsc_traceback_error_silent)) {
198114011d0SBarry Smith     static int cnt    = 1;
199*4237731aSLisandro Dalcin     PetscBool  python = (n == PETSC_ERR_PYTHON && cnt == 1) ? PETSC_TRUE : PETSC_FALSE;
200114011d0SBarry Smith 
201*4237731aSLisandro Dalcin     if (p == PETSC_ERROR_INITIAL || python) {
202114011d0SBarry Smith       PetscErrorPrintfHilight();
203dd460d27SBarry Smith       (void)(*PetscErrorPrintf)("--------------------- Error Message --------------------------------------------------------------\n");
2045f3a2c8dSBarry Smith       PetscErrorPrintfNormal();
2054e29e845SStefano Zampini       if (cnt > 1) {
206dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("  It appears a new error in the code was triggered after a previous error, possibly because:\n");
207dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("  -  The first error was not properly handled via (for example) the use of\n");
208dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("     PetscCall(TheFunctionThatErrors()); or\n");
209dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("  -  The second error was triggered while handling the first error.\n");
210dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("  Above is the traceback for the previous unhandled error, below the traceback for the next error\n");
211dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("  ALL ERRORS in the PETSc libraries are fatal, you should add the appropriate error checking to the code\n");
2124e29e845SStefano Zampini         cnt = 1;
2134e29e845SStefano Zampini       }
2144e29e845SStefano Zampini     }
2154e29e845SStefano Zampini     if (cnt == 1) {
216dd460d27SBarry Smith       if (n == PETSC_ERR_MEM || n == PETSC_ERR_MEM_LEAK) (void)PetscErrorMemoryMessage(n);
217a297a907SKarl Rupp       else {
218e5c89e4eSSatish Balay         const char *text;
219dd460d27SBarry Smith         (void)PetscErrorMessage(n, &text, NULL);
220dd460d27SBarry Smith         if (text) (void)(*PetscErrorPrintf)("%s\n", text);
221e5c89e4eSSatish Balay       }
222*4237731aSLisandro Dalcin       if (python) (void)PetscPythonPrintError();
223*4237731aSLisandro Dalcin       else if (mess) (void)(*PetscErrorPrintf)("%s\n", mess);
2243187aabaSJunchao Zhang #if defined(PETSC_PKG_CUDA_MIN_ARCH)
2253187aabaSJunchao Zhang       int confCudaArch = PETSC_PKG_CUDA_MIN_ARCH;    // if PETSc was configured with numbered CUDA arches, get the min arch.
226456dbbcdSJunchao Zhang       int runCudaArch  = PetscDeviceCUPMRuntimeArch; // 0 indicates the code has never initialized a cuda device.
227456dbbcdSJunchao Zhang       if (runCudaArch && confCudaArch > runCudaArch) {
228dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("WARNING! Run on a CUDA device with GPU architecture %d, but PETSc was configured with a minimal GPU architecture %d.\n", runCudaArch, confCudaArch);
229dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("If it is a cudaErrorNoKernelImageForDevice error, you may need to reconfigure PETSc with --with-cuda-arch=%d or --with-cuda-arch=%d,%d\n", runCudaArch, runCudaArch, confCudaArch);
230456dbbcdSJunchao Zhang       }
231456dbbcdSJunchao Zhang #endif
232dd460d27SBarry Smith       (void)PetscOptionsLeftError();
233dd460d27SBarry Smith       (void)(*PetscErrorPrintf)("See https://petsc.org/release/faq/ for trouble shooting.\n");
234660278c0SBarry Smith       if (!PetscCIEnabledPortableErrorOutput) {
23536ff62a9SBarry Smith         size_t clen;
23636ff62a9SBarry Smith 
237dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("%s\n", version);
238dd460d27SBarry Smith         if (PetscErrorPrintfInitializeCalled) (void)(*PetscErrorPrintf)("%s with %d MPI process(es) and PETSC_ARCH %s on %s by %s %s\n", pname, PetscGlobalSize, arch, hostname, username, date);
239dd460d27SBarry Smith         (void)PetscStrlen(petscconfigureoptions, &clen);
240dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("Configure options: %s\n", clen ? petscconfigureoptions : "none used");
241107894f0SSatish Balay       }
242660278c0SBarry Smith     }
243997adca8SBarry Smith     /* print line of stack trace */
244dd460d27SBarry Smith     if (fun) (void)(*PetscErrorPrintf)("#%d %s() at %s:%d\n", cnt++, fun, PetscCIFilename(file), PetscCILinenumber(line));
245dd460d27SBarry Smith     else if (file) (void)(*PetscErrorPrintf)("#%d %s:%d\n", cnt++, PetscCIFilename(file), PetscCILinenumber(line));
24649c86fc7SBarry Smith     if (fun) {
247bbcf679cSJacob Faibussowitsch       PetscBool ismain = PETSC_FALSE;
248bbcf679cSJacob Faibussowitsch 
249dd460d27SBarry Smith       (void)PetscStrncmp(fun, "main", 4, &ismain);
250fbfcfee5SBarry Smith       if (ismain) {
251dd460d27SBarry Smith         if ((n <= PETSC_ERR_MIN_VALUE) || (n >= PETSC_ERR_MAX_VALUE)) (void)(*PetscErrorPrintf)("Reached the main program with an out-of-range error code %d. This should never happen\n", n);
252dd460d27SBarry Smith         (void)PetscOptionsViewError();
253114011d0SBarry Smith         PetscErrorPrintfHilight();
254dd460d27SBarry Smith         (void)(*PetscErrorPrintf)("----------------End of Error Message -------send entire error message to petsc-maint@mcs.anl.gov----------\n");
255114011d0SBarry Smith         PetscErrorPrintfNormal();
256114011d0SBarry Smith       }
25749c86fc7SBarry Smith     }
258997adca8SBarry Smith   } else {
25935f00c14SToby Isaac     // silence this process's stacktrace if it is not the root of an originating error
26035f00c14SToby Isaac     if (p != PETSC_ERROR_REPEAT && rank) petsc_traceback_error_silent = PETSC_TRUE;
26135f00c14SToby Isaac     if (fun) {
26235f00c14SToby Isaac       PetscBool ismain = PETSC_FALSE;
26335f00c14SToby Isaac 
264dd460d27SBarry Smith       (void)PetscStrncmp(fun, "main", 4, &ismain);
26535f00c14SToby Isaac       if (ismain && petsc_traceback_error_silent) {
26635f00c14SToby Isaac         /* This results from PetscError() being called in main: PETSCABORT()
26735f00c14SToby Isaac            will be called after the error handler.  But this thread is not the
26835f00c14SToby Isaac            root rank of the communicator that initialized the error.  So sleep
26935f00c14SToby Isaac            to allow the root thread to finish its printing.
27035f00c14SToby Isaac 
27135f00c14SToby Isaac            (Unless this is running CI, in which case do not sleep because
27235f00c14SToby Isaac            we expect all processes to call MPI_Finalize() and make a clean
27335f00c14SToby Isaac            exit.) */
274dd460d27SBarry Smith         if (!PetscCIEnabledPortableErrorOutput) (void)PetscSleep(10.0);
27535f00c14SToby Isaac       }
27635f00c14SToby Isaac     }
277997adca8SBarry Smith   }
278362febeeSStefano Zampini   return n;
279e5c89e4eSSatish Balay }
280