xref: /petsc/src/sys/error/errtrace.c (revision cc4c1da905d89950b196b027190941013bd3e15a)
10039db0dSBarry Smith #define PETSC_DESIRE_FEATURE_TEST_MACROS /* for fileno() */
2c6db04a5SJed Brown #include <petscsys.h>                    /*I "petscsys.h" I*/
3f67a399dSBarry Smith #include <petsc/private/petscimpl.h>
4c6db04a5SJed Brown #include <petscconfiginfo.h>
5114011d0SBarry Smith #if defined(PETSC_HAVE_UNISTD_H)
6114011d0SBarry Smith   #include <unistd.h>
7114011d0SBarry Smith #endif
89beb8f72SToby Isaac #include "err.h"
935f00c14SToby Isaac #include <petsc/private/logimpl.h> // PETSC_TLS
10e5c89e4eSSatish Balay 
11456dbbcdSJunchao Zhang #if defined(PETSC_HAVE_CUPM)
12456dbbcdSJunchao Zhang   #include <petsc/private/deviceimpl.h>
13456dbbcdSJunchao Zhang #endif
14456dbbcdSJunchao Zhang 
15e5c89e4eSSatish Balay /*@C
16aaa8cc7dSPierre Jolivet   PetscIgnoreErrorHandler - Deprecated, use `PetscReturnErrorHandler()`. Ignores the error, allows program to continue as if error did not occur
17e5c89e4eSSatish Balay 
18*cc4c1da9SBarry Smith   Not Collective, No Fortran Support
19e5c89e4eSSatish Balay 
20e5c89e4eSSatish Balay   Input Parameters:
21e32f2f54SBarry Smith + comm - communicator over which error occurred
22e32f2f54SBarry Smith . line - the line number of the error (indicated by __LINE__)
2310450e9eSJacob Faibussowitsch . fun  - the function name
24e5c89e4eSSatish Balay . file - the file in which the error was detected (indicated by __FILE__)
25e5c89e4eSSatish Balay . mess - an error text string, usually just printed to the screen
26e5c89e4eSSatish Balay . n    - the generic error number
27e5c89e4eSSatish Balay . p    - specific error number
28e5c89e4eSSatish Balay - ctx  - error handler context
29e5c89e4eSSatish Balay 
30e5c89e4eSSatish Balay   Level: developer
31e5c89e4eSSatish Balay 
32811af0c4SBarry Smith   Note:
33811af0c4SBarry Smith   Users do not directly call this routine
34e5c89e4eSSatish Balay 
35db781477SPatrick Sanan .seealso: `PetscReturnErrorHandler()`
36e5c89e4eSSatish Balay  @*/
37d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscIgnoreErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, void *ctx)
38d71ae5a4SJacob Faibussowitsch {
3910450e9eSJacob Faibussowitsch   (void)comm;
4010450e9eSJacob Faibussowitsch   (void)line;
4110450e9eSJacob Faibussowitsch   (void)fun;
4210450e9eSJacob Faibussowitsch   (void)file;
4310450e9eSJacob Faibussowitsch   (void)p;
4410450e9eSJacob Faibussowitsch   (void)mess;
4510450e9eSJacob Faibussowitsch   (void)ctx;
4611cc89d2SBarry Smith   return n;
47e5c89e4eSSatish Balay }
48e5c89e4eSSatish Balay 
49107894f0SSatish Balay /* ---------------------------------------------------------------------------------------*/
50107894f0SSatish Balay 
515abee1b0SJed Brown static char      arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
52ace3abfcSBarry Smith static PetscBool PetscErrorPrintfInitializeCalled = PETSC_FALSE;
533f6e4ae9SSatish Balay static char      version[256];
54107894f0SSatish Balay 
55107894f0SSatish Balay /*
56107894f0SSatish Balay    Initializes arch, hostname, username, date so that system calls do NOT need
57107894f0SSatish Balay    to be made during the error handler.
58107894f0SSatish Balay */
59d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscErrorPrintfInitialize(void)
60d71ae5a4SJacob Faibussowitsch {
61ace3abfcSBarry Smith   PetscBool use_stdout = PETSC_FALSE, use_none = PETSC_FALSE;
62107894f0SSatish Balay 
63107894f0SSatish Balay   PetscFunctionBegin;
649566063dSJacob Faibussowitsch   PetscCall(PetscGetArchType(arch, sizeof(arch)));
659566063dSJacob Faibussowitsch   PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
669566063dSJacob Faibussowitsch   PetscCall(PetscGetUserName(username, sizeof(username)));
679566063dSJacob Faibussowitsch   PetscCall(PetscGetProgramName(pname, sizeof(pname)));
689566063dSJacob Faibussowitsch   PetscCall(PetscGetDate(date, sizeof(date)));
699566063dSJacob Faibussowitsch   PetscCall(PetscGetVersion(version, sizeof(version)));
70e8fb0fc0SBarry Smith 
719566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_stdout", &use_stdout, NULL));
72a297a907SKarl Rupp   if (use_stdout) PETSC_STDERR = PETSC_STDOUT;
739566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_none", &use_none, NULL));
74a297a907SKarl Rupp   if (use_none) PetscErrorPrintf = PetscErrorPrintfNone;
75107894f0SSatish Balay   PetscErrorPrintfInitializeCalled = PETSC_TRUE;
763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
77107894f0SSatish Balay }
78107894f0SSatish Balay 
79d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscErrorPrintfNone(const char format[], ...)
80d71ae5a4SJacob Faibussowitsch {
813ba16761SJacob Faibussowitsch   return PETSC_SUCCESS;
82e8fb0fc0SBarry Smith }
83e8fb0fc0SBarry Smith 
84d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscErrorPrintfDefault(const char format[], ...)
85d71ae5a4SJacob Faibussowitsch {
86e8fb0fc0SBarry Smith   va_list          Argp;
87ace3abfcSBarry Smith   static PetscBool PetscErrorPrintfCalled = PETSC_FALSE;
883ba16761SJacob Faibussowitsch   PetscErrorCode   ierr;
89e8fb0fc0SBarry Smith 
90e8fb0fc0SBarry Smith   /*
91e8fb0fc0SBarry Smith       This function does not call PetscFunctionBegin and PetscFunctionReturn() because
92e8fb0fc0SBarry Smith     it may be called by PetscStackView().
93e8fb0fc0SBarry Smith 
94e8fb0fc0SBarry Smith       This function does not do error checking because it is called by the error handlers.
95e8fb0fc0SBarry Smith   */
96e8fb0fc0SBarry Smith 
97e8fb0fc0SBarry Smith   if (!PetscErrorPrintfCalled) {
98e8fb0fc0SBarry Smith     PetscErrorPrintfCalled = PETSC_TRUE;
99e8fb0fc0SBarry Smith 
100e8fb0fc0SBarry Smith     /*
101e8fb0fc0SBarry Smith         On the SGI machines and Cray T3E, if errors are generated  "simultaneously" by
102e8fb0fc0SBarry Smith       different processors, the messages are printed all jumbled up; to try to
103e8fb0fc0SBarry Smith       prevent this we have each processor wait based on their rank
104e8fb0fc0SBarry Smith     */
105e8fb0fc0SBarry Smith #if defined(PETSC_CAN_SLEEP_AFTER_ERROR)
106e8fb0fc0SBarry Smith     {
1073ba16761SJacob Faibussowitsch       PetscMPIInt rank = PetscGlobalRank > 8 ? 8 : PetscGlobalRank;
1083ba16761SJacob Faibussowitsch       ierr             = PetscSleep((PetscReal)rank);
1093ba16761SJacob Faibussowitsch       (void)ierr;
110e8fb0fc0SBarry Smith     }
111e8fb0fc0SBarry Smith #endif
112e8fb0fc0SBarry Smith   }
113e8fb0fc0SBarry Smith 
1143ba16761SJacob Faibussowitsch   ierr = PetscFPrintf(PETSC_COMM_SELF, PETSC_STDERR, "[%d]PETSC ERROR: ", PetscGlobalRank);
115e8fb0fc0SBarry Smith   va_start(Argp, format);
1163ba16761SJacob Faibussowitsch   ierr = (*PetscVFPrintf)(PETSC_STDERR, format, Argp);
1173ba16761SJacob Faibussowitsch   (void)ierr;
118e8fb0fc0SBarry Smith   va_end(Argp);
1193ba16761SJacob Faibussowitsch   return PETSC_SUCCESS;
120e8fb0fc0SBarry Smith }
121e8fb0fc0SBarry Smith 
122c2eed0edSBarry Smith /*
123c2eed0edSBarry Smith    On some systems when the stderr is nested through several levels of shell script
124c2eed0edSBarry Smith    before being passed to a file the isatty() falsely returns true resulting in
125c2eed0edSBarry Smith    the screen highlight variables being passed through the test harness. Therefore
126c2eed0edSBarry Smith    simply do not highlight when the PETSC_STDERR is PETSC_STDOUT.
127c2eed0edSBarry Smith */
128d71ae5a4SJacob Faibussowitsch static void PetscErrorPrintfHilight(void)
129d71ae5a4SJacob Faibussowitsch {
13098ed35c3SBarry Smith #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
131c2eed0edSBarry Smith   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
132114011d0SBarry Smith     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[1;31m");
133114011d0SBarry Smith   }
134114011d0SBarry Smith #endif
135114011d0SBarry Smith }
136114011d0SBarry Smith 
137d71ae5a4SJacob Faibussowitsch static void PetscErrorPrintfNormal(void)
138d71ae5a4SJacob Faibussowitsch {
13998ed35c3SBarry Smith #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
140c2eed0edSBarry Smith   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
141114011d0SBarry Smith     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[0;39m\033[0;49m");
142114011d0SBarry Smith   }
143114011d0SBarry Smith #endif
144114011d0SBarry Smith }
145114011d0SBarry Smith 
14695c0884eSLisandro Dalcin PETSC_EXTERN PetscErrorCode PetscOptionsViewError(void);
147114011d0SBarry Smith 
14835f00c14SToby Isaac static PETSC_TLS PetscBool petsc_traceback_error_silent = PETSC_FALSE;
14935f00c14SToby Isaac 
150e5c89e4eSSatish Balay /*@C
151e5c89e4eSSatish Balay 
152e5c89e4eSSatish Balay   PetscTraceBackErrorHandler - Default error handler routine that generates
153e5c89e4eSSatish Balay   a traceback on error detection.
154e5c89e4eSSatish Balay 
155*cc4c1da9SBarry Smith   Not Collective, No Fortran Support
156e5c89e4eSSatish Balay 
157e5c89e4eSSatish Balay   Input Parameters:
158e32f2f54SBarry Smith + comm - communicator over which error occurred
1596e25c4a1SBarry Smith . line - the line number of the error (usually indicated by `__LINE__` in the calling routine)
16010450e9eSJacob Faibussowitsch . fun  - the function name
1616e25c4a1SBarry Smith . file - the file in which the error was detected (usually indicated by `__FILE__` in the calling routine)
162e5c89e4eSSatish Balay . mess - an error text string, usually just printed to the screen
163e5c89e4eSSatish Balay . n    - the generic error number
164811af0c4SBarry Smith . p    - `PETSC_ERROR_INITIAL` if this is the first call the error handler, otherwise `PETSC_ERROR_REPEAT`
165e5c89e4eSSatish Balay - ctx  - error handler context
166e5c89e4eSSatish Balay 
167811af0c4SBarry Smith   Options Database Keys:
1686e25c4a1SBarry Smith + -error_output_stdout - output the error messages to `stdout` instead of the default `stderr`
16945b666d6SBarry Smith - -error_output_none   - do not output the error messages
170e5c89e4eSSatish Balay 
171e5c89e4eSSatish Balay   Notes:
172811af0c4SBarry Smith   Users do not directly call this routine
173e5c89e4eSSatish Balay 
174811af0c4SBarry Smith   Use `PetscPushErrorHandler()` to set the desired error handler.
175e5c89e4eSSatish Balay 
17645b666d6SBarry Smith   Level: developer
177e5c89e4eSSatish Balay 
178db781477SPatrick Sanan .seealso: `PetscError()`, `PetscPushErrorHandler()`, `PetscPopErrorHandler()`, `PetscAttachDebuggerErrorHandler()`,
1796e25c4a1SBarry Smith           `PetscAbortErrorHandler()`, `PetscMPIAbortErrorHandler()`, `PetscReturnErrorHandler()`, `PetscEmacsClientErrorHandler()`,
1806e25c4a1SBarry Smith            `PETSC_ERROR_INITIAL`, `PETSC_ERROR_REPEAT`, `PetscErrorCode`, `PetscErrorType`
181e5c89e4eSSatish Balay  @*/
182d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscTraceBackErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, void *ctx)
183d71ae5a4SJacob Faibussowitsch {
1843ba16761SJacob Faibussowitsch   PetscErrorCode ierr;
185997adca8SBarry Smith   PetscMPIInt    rank = 0;
186e5c89e4eSSatish Balay 
18710450e9eSJacob Faibussowitsch   (void)ctx;
188a297a907SKarl Rupp   if (comm != PETSC_COMM_SELF) MPI_Comm_rank(comm, &rank);
189a297a907SKarl Rupp 
19035f00c14SToby Isaac   // reinitialize the error handler when a new initializing error is detected
19135f00c14SToby Isaac   if (p != PETSC_ERROR_REPEAT) {
19235f00c14SToby Isaac     petsc_traceback_error_silent = PETSC_FALSE;
19335f00c14SToby Isaac     if (PetscCIEnabledPortableErrorOutput) {
19435f00c14SToby Isaac       PetscMPIInt size = 1;
19535f00c14SToby Isaac 
19635f00c14SToby Isaac       if (comm != MPI_COMM_NULL) MPI_Comm_size(comm, &size);
19735f00c14SToby Isaac       petscabortmpifinalize = (size == PetscGlobalSize) ? PETSC_TRUE : PETSC_FALSE;
19835f00c14SToby Isaac     }
19935f00c14SToby Isaac   }
20035f00c14SToby Isaac 
20135f00c14SToby Isaac   if (rank == 0 && (!PetscCIEnabledPortableErrorOutput || PetscGlobalRank == 0) && (p != PETSC_ERROR_REPEAT || !petsc_traceback_error_silent)) {
202114011d0SBarry Smith     static int cnt = 1;
203114011d0SBarry Smith 
2044e29e845SStefano Zampini     if (p == PETSC_ERROR_INITIAL) {
205114011d0SBarry Smith       PetscErrorPrintfHilight();
2063ba16761SJacob Faibussowitsch       ierr = (*PetscErrorPrintf)("--------------------- Error Message --------------------------------------------------------------\n");
2075f3a2c8dSBarry Smith       PetscErrorPrintfNormal();
2084e29e845SStefano Zampini       if (cnt > 1) {
2099beb8f72SToby Isaac         ierr = (*PetscErrorPrintf)("  It appears a new error in the code was triggered after a previous error, possibly because:\n");
2109beb8f72SToby Isaac         ierr = (*PetscErrorPrintf)("  -  The first error was not properly handled via (for example) the use of\n");
2119beb8f72SToby Isaac         ierr = (*PetscErrorPrintf)("     PetscCall(TheFunctionThatErrors()); or\n");
2129beb8f72SToby Isaac         ierr = (*PetscErrorPrintf)("  -  The second error was triggered while handling the first error.\n");
2139beb8f72SToby Isaac         ierr = (*PetscErrorPrintf)("  Above is the traceback for the previous unhandled error, below the traceback for the next error\n");
2144e29e845SStefano Zampini         ierr = (*PetscErrorPrintf)("  ALL ERRORS in the PETSc libraries are fatal, you should add the appropriate error checking to the code\n");
2154e29e845SStefano Zampini         cnt  = 1;
2164e29e845SStefano Zampini       }
2174e29e845SStefano Zampini     }
2184e29e845SStefano Zampini     if (cnt == 1) {
2199beb8f72SToby Isaac       if (n == PETSC_ERR_MEM || n == PETSC_ERR_MEM_LEAK) ierr = PetscErrorMemoryMessage(n);
220a297a907SKarl Rupp       else {
221e5c89e4eSSatish Balay         const char *text;
2223ba16761SJacob Faibussowitsch         ierr = PetscErrorMessage(n, &text, NULL);
2233ba16761SJacob Faibussowitsch         if (text) ierr = (*PetscErrorPrintf)("%s\n", text);
224e5c89e4eSSatish Balay       }
2253ba16761SJacob Faibussowitsch       if (mess) ierr = (*PetscErrorPrintf)("%s\n", mess);
226456dbbcdSJunchao Zhang #if defined(PETSC_HAVE_CUDA_MIN_ARCH)
227456dbbcdSJunchao Zhang       int confCudaArch = PETSC_HAVE_CUDA_MIN_ARCH;   // if PETSc was configured with numbered CUDA arches, get the min arch.
228456dbbcdSJunchao Zhang       int runCudaArch  = PetscDeviceCUPMRuntimeArch; // 0 indicates the code has never initialized a cuda device.
229456dbbcdSJunchao Zhang       if (runCudaArch && confCudaArch > runCudaArch) {
230456dbbcdSJunchao Zhang         ierr = (*PetscErrorPrintf)("WARNING! Run on a CUDA device with GPU architecture %d, but PETSc was configured with a minimal GPU architecture %d.\n", runCudaArch, confCudaArch);
231456dbbcdSJunchao Zhang         ierr = (*PetscErrorPrintf)("If it is a cudaErrorNoKernelImageForDevice error, you may need to reconfigure PETSc with --with-cuda-arch=%d or --with-cuda-arch=%d,%d\n", runCudaArch, runCudaArch, confCudaArch);
232456dbbcdSJunchao Zhang       }
233456dbbcdSJunchao Zhang #endif
2343ba16761SJacob Faibussowitsch       ierr = PetscOptionsLeftError();
2353ba16761SJacob Faibussowitsch       ierr = (*PetscErrorPrintf)("See https://petsc.org/release/faq/ for trouble shooting.\n");
236660278c0SBarry Smith       if (!PetscCIEnabledPortableErrorOutput) {
2373ba16761SJacob Faibussowitsch         ierr = (*PetscErrorPrintf)("%s\n", version);
2383ba16761SJacob Faibussowitsch         if (PetscErrorPrintfInitializeCalled) ierr = (*PetscErrorPrintf)("%s on a %s named %s by %s %s\n", pname, arch, hostname, username, date);
2393ba16761SJacob Faibussowitsch         ierr = (*PetscErrorPrintf)("Configure options %s\n", petscconfigureoptions);
240107894f0SSatish Balay       }
241660278c0SBarry Smith     }
242997adca8SBarry Smith     /* print line of stack trace */
2433ba16761SJacob Faibussowitsch     if (fun) ierr = (*PetscErrorPrintf)("#%d %s() at %s:%d\n", cnt++, fun, PetscCIFilename(file), PetscCILinenumber(line));
2443ba16761SJacob Faibussowitsch     else if (file) ierr = (*PetscErrorPrintf)("#%d %s:%d\n", cnt++, PetscCIFilename(file), PetscCILinenumber(line));
24549c86fc7SBarry Smith     if (fun) {
246bbcf679cSJacob Faibussowitsch       PetscBool ismain = PETSC_FALSE;
247bbcf679cSJacob Faibussowitsch 
2483ba16761SJacob Faibussowitsch       ierr = PetscStrncmp(fun, "main", 4, &ismain);
249fbfcfee5SBarry Smith       if (ismain) {
2503ba16761SJacob Faibussowitsch         if ((n <= PETSC_ERR_MIN_VALUE) || (n >= PETSC_ERR_MAX_VALUE)) ierr = (*PetscErrorPrintf)("Reached the main program with an out-of-range error code %d. This should never happen\n", n);
2513ba16761SJacob Faibussowitsch         ierr = PetscOptionsViewError();
252114011d0SBarry Smith         PetscErrorPrintfHilight();
2533ba16761SJacob Faibussowitsch         ierr = (*PetscErrorPrintf)("----------------End of Error Message -------send entire error message to petsc-maint@mcs.anl.gov----------\n");
254114011d0SBarry Smith         PetscErrorPrintfNormal();
255114011d0SBarry Smith       }
25649c86fc7SBarry Smith     }
257997adca8SBarry Smith   } else {
25835f00c14SToby Isaac     // silence this process's stacktrace if it is not the root of an originating error
25935f00c14SToby Isaac     if (p != PETSC_ERROR_REPEAT && rank) petsc_traceback_error_silent = PETSC_TRUE;
26035f00c14SToby Isaac     if (fun) {
26135f00c14SToby Isaac       PetscBool ismain = PETSC_FALSE;
26235f00c14SToby Isaac 
26335f00c14SToby Isaac       ierr = PetscStrncmp(fun, "main", 4, &ismain);
26435f00c14SToby Isaac       if (ismain && petsc_traceback_error_silent) {
26535f00c14SToby Isaac         /* This results from PetscError() being called in main: PETSCABORT()
26635f00c14SToby Isaac            will be called after the error handler.  But this thread is not the
26735f00c14SToby Isaac            root rank of the communicator that initialized the error.  So sleep
26835f00c14SToby Isaac            to allow the root thread to finish its printing.
26935f00c14SToby Isaac 
27035f00c14SToby Isaac            (Unless this is running CI, in which case do not sleep because
27135f00c14SToby Isaac            we expect all processes to call MPI_Finalize() and make a clean
27235f00c14SToby Isaac            exit.) */
27335f00c14SToby Isaac         if (!PetscCIEnabledPortableErrorOutput) ierr = PetscSleep(10.0);
27435f00c14SToby Isaac       }
27535f00c14SToby Isaac     }
276997adca8SBarry Smith   }
2773ba16761SJacob Faibussowitsch   (void)ierr;
278362febeeSStefano Zampini   return n;
279e5c89e4eSSatish Balay }
280