xref: /petsc/src/sys/error/errtrace.c (revision 955c50dbca49d45b485af42b14044928cc8409d7)
1 #define PETSC_DESIRE_FEATURE_TEST_MACROS /* for fileno() */
2 #include <petscsys.h>                    /*I "petscsys.h" I*/
3 #include <petsc/private/petscimpl.h>
4 #include <petscconfiginfo.h>
5 #if defined(PETSC_HAVE_UNISTD_H)
6   #include <unistd.h>
7 #endif
8 #include "err.h"
9 #include <petsc/private/logimpl.h> // PETSC_TLS
10 
11 #if defined(PETSC_HAVE_CUPM)
12   #include <petsc/private/deviceimpl.h>
13 #endif
14 
15 /*@C
16   PetscIgnoreErrorHandler - Deprecated, use `PetscReturnErrorHandler()`. Ignores the error, allows program to continue as if error did not occur
17 
18   Not Collective, No Fortran Support
19 
20   Input Parameters:
21 + comm - communicator over which error occurred
22 . line - the line number of the error (indicated by __LINE__)
23 . fun  - the function name
24 . file - the file in which the error was detected (indicated by __FILE__)
25 . mess - an error text string, usually just printed to the screen
26 . n    - the generic error number
27 . p    - specific error number
28 - ctx  - error handler context
29 
30   Level: developer
31 
32   Note:
33   Users do not directly call this routine
34 
35 .seealso: `PetscReturnErrorHandler()`
36  @*/
37 PetscErrorCode PetscIgnoreErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, PetscCtx ctx)
38 {
39   (void)comm;
40   (void)line;
41   (void)fun;
42   (void)file;
43   (void)p;
44   (void)mess;
45   (void)ctx;
46   return n;
47 }
48 
49 static char      arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
50 static PetscBool PetscErrorPrintfInitializeCalled = PETSC_FALSE;
51 static char      version[256];
52 
53 /*
54    Initializes arch, hostname, username, date so that system calls do NOT need
55    to be made during the error handler.
56 */
57 PetscErrorCode PetscErrorPrintfInitialize(void)
58 {
59   PetscBool use_stdout = PETSC_FALSE, use_none = PETSC_FALSE;
60 
61   PetscFunctionBegin;
62   PetscCall(PetscGetArchType(arch, sizeof(arch)));
63   PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
64   PetscCall(PetscGetUserName(username, sizeof(username)));
65   PetscCall(PetscGetProgramName(pname, sizeof(pname)));
66   PetscCall(PetscGetDate(date, sizeof(date)));
67   PetscCall(PetscGetVersion(version, sizeof(version)));
68 
69   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_stdout", &use_stdout, NULL));
70   if (use_stdout) PETSC_STDERR = PETSC_STDOUT;
71   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_none", &use_none, NULL));
72   if (use_none) PetscErrorPrintf = PetscErrorPrintfNone;
73   PetscErrorPrintfInitializeCalled = PETSC_TRUE;
74   PetscFunctionReturn(PETSC_SUCCESS);
75 }
76 
77 PetscErrorCode PetscErrorPrintfNone(const char format[], ...)
78 {
79   return PETSC_SUCCESS;
80 }
81 
82 PetscErrorCode PetscErrorPrintfDefault(const char format[], ...)
83 {
84   va_list          Argp;
85   static PetscBool PetscErrorPrintfCalled = PETSC_FALSE;
86 
87   /*
88       This function does not call PetscFunctionBegin and PetscFunctionReturn() because
89     it may be called by PetscStackView().
90 
91       This function does not do error checking because it is called by the error handlers.
92   */
93 
94   if (!PetscErrorPrintfCalled) {
95     PetscErrorPrintfCalled = PETSC_TRUE;
96 
97     /*
98         On the SGI machines and Cray T3E, if errors are generated  "simultaneously" by
99       different processors, the messages are printed all jumbled up; to try to
100       prevent this we have each processor wait based on their rank
101     */
102 #if defined(PETSC_CAN_SLEEP_AFTER_ERROR)
103     {
104       PetscMPIInt rank = PetscGlobalRank > 8 ? 8 : PetscGlobalRank;
105       (void)PetscSleep((PetscReal)rank);
106     }
107 #endif
108   }
109 
110   (void)PetscFPrintf(PETSC_COMM_SELF, PETSC_STDERR, "[%d]PETSC ERROR: ", PetscGlobalRank);
111   va_start(Argp, format);
112   (void)(*PetscVFPrintf)(PETSC_STDERR, format, Argp);
113   va_end(Argp);
114   return PETSC_SUCCESS;
115 }
116 
117 /*
118    On some systems when the stderr is nested through several levels of shell script
119    before being passed to a file the isatty() falsely returns true resulting in
120    the screen highlight variables being passed through the test harness. Therefore
121    simply do not highlight when the PETSC_STDERR is PETSC_STDOUT.
122 */
123 static void PetscErrorPrintfHilight(void)
124 {
125 #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
126   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
127     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[1;31m");
128   }
129 #endif
130 }
131 
132 static void PetscErrorPrintfNormal(void)
133 {
134 #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
135   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
136     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[0;39m\033[0;49m");
137   }
138 #endif
139 }
140 
141 PETSC_EXTERN PetscErrorCode PetscOptionsViewError(void);
142 
143 static PETSC_TLS PetscBool petsc_traceback_error_silent = PETSC_FALSE;
144 
145 /*@C
146 
147   PetscTraceBackErrorHandler - Default error handler routine that generates
148   a traceback on error detection.
149 
150   Not Collective, No Fortran Support
151 
152   Input Parameters:
153 + comm - communicator over which error occurred
154 . line - the line number of the error (usually indicated by `__LINE__` in the calling routine)
155 . fun  - the function name
156 . file - the file in which the error was detected (usually indicated by `__FILE__` in the calling routine)
157 . mess - an error text string, usually just printed to the screen
158 . n    - the generic error number
159 . p    - `PETSC_ERROR_INITIAL` if this is the first call the error handler, otherwise `PETSC_ERROR_REPEAT`
160 - ctx  - error handler context
161 
162   Options Database Keys:
163 + -error_output_stdout - output the error messages to `stdout` instead of the default `stderr`
164 - -error_output_none   - do not output the error messages
165 
166   Notes:
167   Users do not directly call this routine
168 
169   Use `PetscPushErrorHandler()` to set the desired error handler.
170 
171   Level: developer
172 
173 .seealso: `PetscError()`, `PetscPushErrorHandler()`, `PetscPopErrorHandler()`, `PetscAttachDebuggerErrorHandler()`,
174           `PetscAbortErrorHandler()`, `PetscMPIAbortErrorHandler()`, `PetscReturnErrorHandler()`, `PetscEmacsClientErrorHandler()`,
175            `PETSC_ERROR_INITIAL`, `PETSC_ERROR_REPEAT`, `PetscErrorCode`, `PetscErrorType`
176  @*/
177 PetscErrorCode PetscTraceBackErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, PetscCtx ctx)
178 {
179   PetscMPIInt rank = 0;
180 
181   (void)ctx;
182   if (comm != PETSC_COMM_SELF) MPI_Comm_rank(comm, &rank);
183 
184   // reinitialize the error handler when a new initializing error is detected
185   if (p != PETSC_ERROR_REPEAT) {
186     petsc_traceback_error_silent = PETSC_FALSE;
187     if (PetscCIEnabledPortableErrorOutput) {
188       PetscMPIInt size = 1;
189 
190       if (comm != MPI_COMM_NULL) MPI_Comm_size(comm, &size);
191       petscabortmpifinalize = (size == PetscGlobalSize) ? PETSC_TRUE : PETSC_FALSE;
192     }
193   }
194 
195   if (rank == 0 && (!PetscCIEnabledPortableErrorOutput || PetscGlobalRank == 0) && (p != PETSC_ERROR_REPEAT || !petsc_traceback_error_silent)) {
196     static int cnt    = 1;
197     PetscBool  python = (n == PETSC_ERR_PYTHON && cnt == 1) ? PETSC_TRUE : PETSC_FALSE;
198 
199     if (p == PETSC_ERROR_INITIAL || python) {
200       PetscErrorPrintfHilight();
201       (void)(*PetscErrorPrintf)("--------------------- Error Message --------------------------------------------------------------\n");
202       PetscErrorPrintfNormal();
203       if (cnt > 1) {
204         (void)(*PetscErrorPrintf)("  It appears a new error in the code was triggered after a previous error, possibly because:\n");
205         (void)(*PetscErrorPrintf)("  -  The first error was not properly handled via (for example) the use of\n");
206         (void)(*PetscErrorPrintf)("     PetscCall(TheFunctionThatErrors()); or\n");
207         (void)(*PetscErrorPrintf)("  -  The second error was triggered while handling the first error.\n");
208         (void)(*PetscErrorPrintf)("  Above is the traceback for the previous unhandled error, below the traceback for the next error\n");
209         (void)(*PetscErrorPrintf)("  ALL ERRORS in the PETSc libraries are fatal, you should add the appropriate error checking to the code\n");
210         cnt = 1;
211       }
212     }
213     if (cnt == 1) {
214       if (n == PETSC_ERR_MEM || n == PETSC_ERR_MEM_LEAK) (void)PetscErrorMemoryMessage(n);
215       else {
216         const char *text;
217         (void)PetscErrorMessage(n, &text, NULL);
218         if (text) (void)(*PetscErrorPrintf)("%s\n", text);
219       }
220       if (python) (void)PetscPythonPrintError();
221       else if (mess) (void)(*PetscErrorPrintf)("%s\n", mess);
222 #if defined(PETSC_PKG_CUDA_MIN_ARCH)
223       int confCudaArch = PETSC_PKG_CUDA_MIN_ARCH;    // if PETSc was configured with numbered CUDA arches, get the min arch.
224       int runCudaArch  = PetscDeviceCUPMRuntimeArch; // 0 indicates the code has never initialized a cuda device.
225       if (runCudaArch && confCudaArch > runCudaArch) {
226         (void)(*PetscErrorPrintf)("WARNING! Run on a CUDA device with GPU architecture %d, but PETSc was configured with a minimal GPU architecture %d.\n", runCudaArch, confCudaArch);
227         (void)(*PetscErrorPrintf)("If it is a cudaErrorNoKernelImageForDevice error, you may need to reconfigure PETSc with --with-cuda-arch=%d or --with-cuda-arch=%d,%d\n", runCudaArch, runCudaArch, confCudaArch);
228       }
229 #endif
230       (void)PetscOptionsLeftError();
231       (void)(*PetscErrorPrintf)("See https://petsc.org/release/faq/ for trouble shooting.\n");
232       if (!PetscCIEnabledPortableErrorOutput) {
233         size_t clen;
234 
235         (void)(*PetscErrorPrintf)("%s\n", version);
236         if (PetscErrorPrintfInitializeCalled) (void)(*PetscErrorPrintf)("%s with %d MPI process(es) and PETSC_ARCH %s on %s by %s %s\n", pname, PetscGlobalSize, arch, hostname, username, date);
237         (void)PetscStrlen(petscconfigureoptions, &clen);
238         (void)(*PetscErrorPrintf)("Configure options: %s\n", clen ? petscconfigureoptions : "none used");
239       }
240     }
241     /* print line of stack trace */
242     if (fun) (void)(*PetscErrorPrintf)("#%d %s() at %s:%d\n", cnt++, fun, PetscCIFilename(file), PetscCILinenumber(line));
243     else if (file) (void)(*PetscErrorPrintf)("#%d %s:%d\n", cnt++, PetscCIFilename(file), PetscCILinenumber(line));
244     if (fun) {
245       PetscBool ismain = PETSC_FALSE;
246 
247       (void)PetscStrncmp(fun, "main", 4, &ismain);
248       if (ismain) {
249         if ((n <= PETSC_ERR_MIN_VALUE) || (n >= PETSC_ERR_MAX_VALUE)) (void)(*PetscErrorPrintf)("Reached the main program with an out-of-range error code %d. This should never happen\n", n);
250         (void)PetscOptionsViewError();
251         PetscErrorPrintfHilight();
252         (void)(*PetscErrorPrintf)("----------------End of Error Message -------send entire error message to petsc-maint@mcs.anl.gov----------\n");
253         PetscErrorPrintfNormal();
254       }
255     }
256   } else {
257     // silence this process's stacktrace if it is not the root of an originating error
258     if (p != PETSC_ERROR_REPEAT && rank) petsc_traceback_error_silent = PETSC_TRUE;
259     if (fun) {
260       PetscBool ismain = PETSC_FALSE;
261 
262       (void)PetscStrncmp(fun, "main", 4, &ismain);
263       if (ismain && petsc_traceback_error_silent) {
264         /* This results from PetscError() being called in main: PETSCABORT()
265            will be called after the error handler.  But this thread is not the
266            root rank of the communicator that initialized the error.  So sleep
267            to allow the root thread to finish its printing.
268 
269            (Unless this is running CI, in which case do not sleep because
270            we expect all processes to call MPI_Finalize() and make a clean
271            exit.) */
272         if (!PetscCIEnabledPortableErrorOutput) (void)PetscSleep(10.0);
273       }
274     }
275   }
276   return n;
277 }
278