xref: /petsc/src/sys/error/errtrace.c (revision 607e733f3db3ee7f6f605a13295c517df8dbb9c9)
1 #define PETSC_DESIRE_FEATURE_TEST_MACROS /* for fileno() */
2 #include <petscsys.h>                    /*I "petscsys.h" I*/
3 #include <petsc/private/petscimpl.h>
4 #include <petscconfiginfo.h>
5 #if defined(PETSC_HAVE_UNISTD_H)
6   #include <unistd.h>
7 #endif
8 #include "err.h"
9 #include <petsc/private/logimpl.h> // PETSC_TLS
10 
11 #if defined(PETSC_HAVE_CUPM)
12   #include <petsc/private/deviceimpl.h>
13 #endif
14 
15 /*@C
16   PetscIgnoreErrorHandler - Deprecated, use `PetscReturnErrorHandler()`. Ignores the error, allows program to continue as if error did not occur
17 
18   Not Collective, No Fortran Support
19 
20   Input Parameters:
21 + comm - communicator over which error occurred
22 . line - the line number of the error (indicated by __LINE__)
23 . fun  - the function name
24 . file - the file in which the error was detected (indicated by __FILE__)
25 . mess - an error text string, usually just printed to the screen
26 . n    - the generic error number
27 . p    - specific error number
28 - ctx  - error handler context
29 
30   Level: developer
31 
32   Note:
33   Users do not directly call this routine
34 
35 .seealso: `PetscReturnErrorHandler()`
36  @*/
37 PetscErrorCode PetscIgnoreErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, PetscCtx ctx)
38 {
39   (void)comm;
40   (void)line;
41   (void)fun;
42   (void)file;
43   (void)p;
44   (void)mess;
45   (void)ctx;
46   return n;
47 }
48 
49 /* ---------------------------------------------------------------------------------------*/
50 
51 static char      arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
52 static PetscBool PetscErrorPrintfInitializeCalled = PETSC_FALSE;
53 static char      version[256];
54 
55 /*
56    Initializes arch, hostname, username, date so that system calls do NOT need
57    to be made during the error handler.
58 */
59 PetscErrorCode PetscErrorPrintfInitialize(void)
60 {
61   PetscBool use_stdout = PETSC_FALSE, use_none = PETSC_FALSE;
62 
63   PetscFunctionBegin;
64   PetscCall(PetscGetArchType(arch, sizeof(arch)));
65   PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
66   PetscCall(PetscGetUserName(username, sizeof(username)));
67   PetscCall(PetscGetProgramName(pname, sizeof(pname)));
68   PetscCall(PetscGetDate(date, sizeof(date)));
69   PetscCall(PetscGetVersion(version, sizeof(version)));
70 
71   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_stdout", &use_stdout, NULL));
72   if (use_stdout) PETSC_STDERR = PETSC_STDOUT;
73   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_none", &use_none, NULL));
74   if (use_none) PetscErrorPrintf = PetscErrorPrintfNone;
75   PetscErrorPrintfInitializeCalled = PETSC_TRUE;
76   PetscFunctionReturn(PETSC_SUCCESS);
77 }
78 
79 PetscErrorCode PetscErrorPrintfNone(const char format[], ...)
80 {
81   return PETSC_SUCCESS;
82 }
83 
84 PetscErrorCode PetscErrorPrintfDefault(const char format[], ...)
85 {
86   va_list          Argp;
87   static PetscBool PetscErrorPrintfCalled = PETSC_FALSE;
88 
89   /*
90       This function does not call PetscFunctionBegin and PetscFunctionReturn() because
91     it may be called by PetscStackView().
92 
93       This function does not do error checking because it is called by the error handlers.
94   */
95 
96   if (!PetscErrorPrintfCalled) {
97     PetscErrorPrintfCalled = PETSC_TRUE;
98 
99     /*
100         On the SGI machines and Cray T3E, if errors are generated  "simultaneously" by
101       different processors, the messages are printed all jumbled up; to try to
102       prevent this we have each processor wait based on their rank
103     */
104 #if defined(PETSC_CAN_SLEEP_AFTER_ERROR)
105     {
106       PetscMPIInt rank = PetscGlobalRank > 8 ? 8 : PetscGlobalRank;
107       (void)PetscSleep((PetscReal)rank);
108     }
109 #endif
110   }
111 
112   (void)PetscFPrintf(PETSC_COMM_SELF, PETSC_STDERR, "[%d]PETSC ERROR: ", PetscGlobalRank);
113   va_start(Argp, format);
114   (void)(*PetscVFPrintf)(PETSC_STDERR, format, Argp);
115   va_end(Argp);
116   return PETSC_SUCCESS;
117 }
118 
119 /*
120    On some systems when the stderr is nested through several levels of shell script
121    before being passed to a file the isatty() falsely returns true resulting in
122    the screen highlight variables being passed through the test harness. Therefore
123    simply do not highlight when the PETSC_STDERR is PETSC_STDOUT.
124 */
125 static void PetscErrorPrintfHilight(void)
126 {
127 #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
128   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
129     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[1;31m");
130   }
131 #endif
132 }
133 
134 static void PetscErrorPrintfNormal(void)
135 {
136 #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
137   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
138     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[0;39m\033[0;49m");
139   }
140 #endif
141 }
142 
143 PETSC_EXTERN PetscErrorCode PetscOptionsViewError(void);
144 
145 static PETSC_TLS PetscBool petsc_traceback_error_silent = PETSC_FALSE;
146 
147 /*@C
148 
149   PetscTraceBackErrorHandler - Default error handler routine that generates
150   a traceback on error detection.
151 
152   Not Collective, No Fortran Support
153 
154   Input Parameters:
155 + comm - communicator over which error occurred
156 . line - the line number of the error (usually indicated by `__LINE__` in the calling routine)
157 . fun  - the function name
158 . file - the file in which the error was detected (usually indicated by `__FILE__` in the calling routine)
159 . mess - an error text string, usually just printed to the screen
160 . n    - the generic error number
161 . p    - `PETSC_ERROR_INITIAL` if this is the first call the error handler, otherwise `PETSC_ERROR_REPEAT`
162 - ctx  - error handler context
163 
164   Options Database Keys:
165 + -error_output_stdout - output the error messages to `stdout` instead of the default `stderr`
166 - -error_output_none   - do not output the error messages
167 
168   Notes:
169   Users do not directly call this routine
170 
171   Use `PetscPushErrorHandler()` to set the desired error handler.
172 
173   Level: developer
174 
175 .seealso: `PetscError()`, `PetscPushErrorHandler()`, `PetscPopErrorHandler()`, `PetscAttachDebuggerErrorHandler()`,
176           `PetscAbortErrorHandler()`, `PetscMPIAbortErrorHandler()`, `PetscReturnErrorHandler()`, `PetscEmacsClientErrorHandler()`,
177            `PETSC_ERROR_INITIAL`, `PETSC_ERROR_REPEAT`, `PetscErrorCode`, `PetscErrorType`
178  @*/
179 PetscErrorCode PetscTraceBackErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, PetscCtx ctx)
180 {
181   PetscMPIInt rank = 0;
182 
183   (void)ctx;
184   if (comm != PETSC_COMM_SELF) MPI_Comm_rank(comm, &rank);
185 
186   // reinitialize the error handler when a new initializing error is detected
187   if (p != PETSC_ERROR_REPEAT) {
188     petsc_traceback_error_silent = PETSC_FALSE;
189     if (PetscCIEnabledPortableErrorOutput) {
190       PetscMPIInt size = 1;
191 
192       if (comm != MPI_COMM_NULL) MPI_Comm_size(comm, &size);
193       petscabortmpifinalize = (size == PetscGlobalSize) ? PETSC_TRUE : PETSC_FALSE;
194     }
195   }
196 
197   if (rank == 0 && (!PetscCIEnabledPortableErrorOutput || PetscGlobalRank == 0) && (p != PETSC_ERROR_REPEAT || !petsc_traceback_error_silent)) {
198     static int cnt    = 1;
199     PetscBool  python = (n == PETSC_ERR_PYTHON && cnt == 1) ? PETSC_TRUE : PETSC_FALSE;
200 
201     if (p == PETSC_ERROR_INITIAL || python) {
202       PetscErrorPrintfHilight();
203       (void)(*PetscErrorPrintf)("--------------------- Error Message --------------------------------------------------------------\n");
204       PetscErrorPrintfNormal();
205       if (cnt > 1) {
206         (void)(*PetscErrorPrintf)("  It appears a new error in the code was triggered after a previous error, possibly because:\n");
207         (void)(*PetscErrorPrintf)("  -  The first error was not properly handled via (for example) the use of\n");
208         (void)(*PetscErrorPrintf)("     PetscCall(TheFunctionThatErrors()); or\n");
209         (void)(*PetscErrorPrintf)("  -  The second error was triggered while handling the first error.\n");
210         (void)(*PetscErrorPrintf)("  Above is the traceback for the previous unhandled error, below the traceback for the next error\n");
211         (void)(*PetscErrorPrintf)("  ALL ERRORS in the PETSc libraries are fatal, you should add the appropriate error checking to the code\n");
212         cnt = 1;
213       }
214     }
215     if (cnt == 1) {
216       if (n == PETSC_ERR_MEM || n == PETSC_ERR_MEM_LEAK) (void)PetscErrorMemoryMessage(n);
217       else {
218         const char *text;
219         (void)PetscErrorMessage(n, &text, NULL);
220         if (text) (void)(*PetscErrorPrintf)("%s\n", text);
221       }
222       if (python) (void)PetscPythonPrintError();
223       else if (mess) (void)(*PetscErrorPrintf)("%s\n", mess);
224 #if defined(PETSC_PKG_CUDA_MIN_ARCH)
225       int confCudaArch = PETSC_PKG_CUDA_MIN_ARCH;    // if PETSc was configured with numbered CUDA arches, get the min arch.
226       int runCudaArch  = PetscDeviceCUPMRuntimeArch; // 0 indicates the code has never initialized a cuda device.
227       if (runCudaArch && confCudaArch > runCudaArch) {
228         (void)(*PetscErrorPrintf)("WARNING! Run on a CUDA device with GPU architecture %d, but PETSc was configured with a minimal GPU architecture %d.\n", runCudaArch, confCudaArch);
229         (void)(*PetscErrorPrintf)("If it is a cudaErrorNoKernelImageForDevice error, you may need to reconfigure PETSc with --with-cuda-arch=%d or --with-cuda-arch=%d,%d\n", runCudaArch, runCudaArch, confCudaArch);
230       }
231 #endif
232       (void)PetscOptionsLeftError();
233       (void)(*PetscErrorPrintf)("See https://petsc.org/release/faq/ for trouble shooting.\n");
234       if (!PetscCIEnabledPortableErrorOutput) {
235         size_t clen;
236 
237         (void)(*PetscErrorPrintf)("%s\n", version);
238         if (PetscErrorPrintfInitializeCalled) (void)(*PetscErrorPrintf)("%s with %d MPI process(es) and PETSC_ARCH %s on %s by %s %s\n", pname, PetscGlobalSize, arch, hostname, username, date);
239         (void)PetscStrlen(petscconfigureoptions, &clen);
240         (void)(*PetscErrorPrintf)("Configure options: %s\n", clen ? petscconfigureoptions : "none used");
241       }
242     }
243     /* print line of stack trace */
244     if (fun) (void)(*PetscErrorPrintf)("#%d %s() at %s:%d\n", cnt++, fun, PetscCIFilename(file), PetscCILinenumber(line));
245     else if (file) (void)(*PetscErrorPrintf)("#%d %s:%d\n", cnt++, PetscCIFilename(file), PetscCILinenumber(line));
246     if (fun) {
247       PetscBool ismain = PETSC_FALSE;
248 
249       (void)PetscStrncmp(fun, "main", 4, &ismain);
250       if (ismain) {
251         if ((n <= PETSC_ERR_MIN_VALUE) || (n >= PETSC_ERR_MAX_VALUE)) (void)(*PetscErrorPrintf)("Reached the main program with an out-of-range error code %d. This should never happen\n", n);
252         (void)PetscOptionsViewError();
253         PetscErrorPrintfHilight();
254         (void)(*PetscErrorPrintf)("----------------End of Error Message -------send entire error message to petsc-maint@mcs.anl.gov----------\n");
255         PetscErrorPrintfNormal();
256       }
257     }
258   } else {
259     // silence this process's stacktrace if it is not the root of an originating error
260     if (p != PETSC_ERROR_REPEAT && rank) petsc_traceback_error_silent = PETSC_TRUE;
261     if (fun) {
262       PetscBool ismain = PETSC_FALSE;
263 
264       (void)PetscStrncmp(fun, "main", 4, &ismain);
265       if (ismain && petsc_traceback_error_silent) {
266         /* This results from PetscError() being called in main: PETSCABORT()
267            will be called after the error handler.  But this thread is not the
268            root rank of the communicator that initialized the error.  So sleep
269            to allow the root thread to finish its printing.
270 
271            (Unless this is running CI, in which case do not sleep because
272            we expect all processes to call MPI_Finalize() and make a clean
273            exit.) */
274         if (!PetscCIEnabledPortableErrorOutput) (void)PetscSleep(10.0);
275       }
276     }
277   }
278   return n;
279 }
280