xref: /petsc/src/sys/error/errtrace.c (revision 98d129c30f3ee9fdddc40fdbc5a989b7be64f888)
1 #define PETSC_DESIRE_FEATURE_TEST_MACROS /* for fileno() */
2 #include <petscsys.h>                    /*I "petscsys.h" I*/
3 #include <petsc/private/petscimpl.h>
4 #include <petscconfiginfo.h>
5 #if defined(PETSC_HAVE_UNISTD_H)
6   #include <unistd.h>
7 #endif
8 #include "err.h"
9 #include <petsc/private/logimpl.h> // PETSC_TLS
10 
11 #if defined(PETSC_HAVE_CUPM)
12   #include <petsc/private/deviceimpl.h>
13 #endif
14 
15 /*@C
16   PetscIgnoreErrorHandler - Deprecated, use `PetscReturnErrorHandler()`. Ignores the error, allows program to continue as if error did not occur
17 
18   Not Collective
19 
20   Input Parameters:
21 + comm - communicator over which error occurred
22 . line - the line number of the error (indicated by __LINE__)
23 . fun  - the function name
24 . file - the file in which the error was detected (indicated by __FILE__)
25 . mess - an error text string, usually just printed to the screen
26 . n    - the generic error number
27 . p    - specific error number
28 - ctx  - error handler context
29 
30   Level: developer
31 
32   Note:
33   Users do not directly call this routine
34 
35 .seealso: `PetscReturnErrorHandler()`
36  @*/
37 PetscErrorCode PetscIgnoreErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, void *ctx)
38 {
39   (void)comm;
40   (void)line;
41   (void)fun;
42   (void)file;
43   (void)p;
44   (void)mess;
45   (void)ctx;
46   return n;
47 }
48 
49 /* ---------------------------------------------------------------------------------------*/
50 
51 static char      arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
52 static PetscBool PetscErrorPrintfInitializeCalled = PETSC_FALSE;
53 static char      version[256];
54 
55 /*
56    Initializes arch, hostname, username, date so that system calls do NOT need
57    to be made during the error handler.
58 */
59 PetscErrorCode PetscErrorPrintfInitialize(void)
60 {
61   PetscBool use_stdout = PETSC_FALSE, use_none = PETSC_FALSE;
62 
63   PetscFunctionBegin;
64   PetscCall(PetscGetArchType(arch, sizeof(arch)));
65   PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
66   PetscCall(PetscGetUserName(username, sizeof(username)));
67   PetscCall(PetscGetProgramName(pname, sizeof(pname)));
68   PetscCall(PetscGetDate(date, sizeof(date)));
69   PetscCall(PetscGetVersion(version, sizeof(version)));
70 
71   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_stdout", &use_stdout, NULL));
72   if (use_stdout) PETSC_STDERR = PETSC_STDOUT;
73   PetscCall(PetscOptionsGetBool(NULL, NULL, "-error_output_none", &use_none, NULL));
74   if (use_none) PetscErrorPrintf = PetscErrorPrintfNone;
75   PetscErrorPrintfInitializeCalled = PETSC_TRUE;
76   PetscFunctionReturn(PETSC_SUCCESS);
77 }
78 
79 PetscErrorCode PetscErrorPrintfNone(const char format[], ...)
80 {
81   return PETSC_SUCCESS;
82 }
83 
84 PetscErrorCode PetscErrorPrintfDefault(const char format[], ...)
85 {
86   va_list          Argp;
87   static PetscBool PetscErrorPrintfCalled = PETSC_FALSE;
88   PetscErrorCode   ierr;
89 
90   /*
91       This function does not call PetscFunctionBegin and PetscFunctionReturn() because
92     it may be called by PetscStackView().
93 
94       This function does not do error checking because it is called by the error handlers.
95   */
96 
97   if (!PetscErrorPrintfCalled) {
98     PetscErrorPrintfCalled = PETSC_TRUE;
99 
100     /*
101         On the SGI machines and Cray T3E, if errors are generated  "simultaneously" by
102       different processors, the messages are printed all jumbled up; to try to
103       prevent this we have each processor wait based on their rank
104     */
105 #if defined(PETSC_CAN_SLEEP_AFTER_ERROR)
106     {
107       PetscMPIInt rank = PetscGlobalRank > 8 ? 8 : PetscGlobalRank;
108       ierr             = PetscSleep((PetscReal)rank);
109       (void)ierr;
110     }
111 #endif
112   }
113 
114   ierr = PetscFPrintf(PETSC_COMM_SELF, PETSC_STDERR, "[%d]PETSC ERROR: ", PetscGlobalRank);
115   va_start(Argp, format);
116   ierr = (*PetscVFPrintf)(PETSC_STDERR, format, Argp);
117   (void)ierr;
118   va_end(Argp);
119   return PETSC_SUCCESS;
120 }
121 
122 /*
123    On some systems when the stderr is nested through several levels of shell script
124    before being passed to a file the isatty() falsely returns true resulting in
125    the screen highlight variables being passed through the test harness. Therefore
126    simply do not highlight when the PETSC_STDERR is PETSC_STDOUT.
127 */
128 static void PetscErrorPrintfHilight(void)
129 {
130 #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
131   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
132     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[1;31m");
133   }
134 #endif
135 }
136 
137 static void PetscErrorPrintfNormal(void)
138 {
139 #if defined(PETSC_HAVE_UNISTD_H) && defined(PETSC_USE_ISATTY)
140   if (PetscErrorPrintf == PetscErrorPrintfDefault && PETSC_STDERR != PETSC_STDOUT) {
141     if (isatty(fileno(PETSC_STDERR))) fprintf(PETSC_STDERR, "\033[0;39m\033[0;49m");
142   }
143 #endif
144 }
145 
146 PETSC_EXTERN PetscErrorCode PetscOptionsViewError(void);
147 
148 static PETSC_TLS PetscBool petsc_traceback_error_silent = PETSC_FALSE;
149 
150 /*@C
151 
152   PetscTraceBackErrorHandler - Default error handler routine that generates
153   a traceback on error detection.
154 
155   Not Collective
156 
157   Input Parameters:
158 + comm - communicator over which error occurred
159 . line - the line number of the error (usually indicated by `__LINE__` in the calling routine)
160 . fun  - the function name
161 . file - the file in which the error was detected (usually indicated by `__FILE__` in the calling routine)
162 . mess - an error text string, usually just printed to the screen
163 . n    - the generic error number
164 . p    - `PETSC_ERROR_INITIAL` if this is the first call the error handler, otherwise `PETSC_ERROR_REPEAT`
165 - ctx  - error handler context
166 
167   Options Database Keys:
168 + -error_output_stdout - output the error messages to `stdout` instead of the default `stderr`
169 - -error_output_none   - do not output the error messages
170 
171   Notes:
172   Users do not directly call this routine
173 
174   Use `PetscPushErrorHandler()` to set the desired error handler.
175 
176   Level: developer
177 
178 .seealso: `PetscError()`, `PetscPushErrorHandler()`, `PetscPopErrorHandler()`, `PetscAttachDebuggerErrorHandler()`,
179           `PetscAbortErrorHandler()`, `PetscMPIAbortErrorHandler()`, `PetscReturnErrorHandler()`, `PetscEmacsClientErrorHandler()`,
180            `PETSC_ERROR_INITIAL`, `PETSC_ERROR_REPEAT`, `PetscErrorCode`, `PetscErrorType`
181  @*/
182 PetscErrorCode PetscTraceBackErrorHandler(MPI_Comm comm, int line, const char *fun, const char *file, PetscErrorCode n, PetscErrorType p, const char *mess, void *ctx)
183 {
184   PetscErrorCode ierr;
185   PetscMPIInt    rank = 0;
186 
187   (void)ctx;
188   if (comm != PETSC_COMM_SELF) MPI_Comm_rank(comm, &rank);
189 
190   // reinitialize the error handler when a new initializing error is detected
191   if (p != PETSC_ERROR_REPEAT) {
192     petsc_traceback_error_silent = PETSC_FALSE;
193     if (PetscCIEnabledPortableErrorOutput) {
194       PetscMPIInt size = 1;
195 
196       if (comm != MPI_COMM_NULL) MPI_Comm_size(comm, &size);
197       petscabortmpifinalize = (size == PetscGlobalSize) ? PETSC_TRUE : PETSC_FALSE;
198     }
199   }
200 
201   if (rank == 0 && (!PetscCIEnabledPortableErrorOutput || PetscGlobalRank == 0) && (p != PETSC_ERROR_REPEAT || !petsc_traceback_error_silent)) {
202     static int cnt = 1;
203 
204     if (p == PETSC_ERROR_INITIAL) {
205       PetscErrorPrintfHilight();
206       ierr = (*PetscErrorPrintf)("--------------------- Error Message --------------------------------------------------------------\n");
207       PetscErrorPrintfNormal();
208       if (cnt > 1) {
209         ierr = (*PetscErrorPrintf)("  It appears a new error in the code was triggered after a previous error, possibly because:\n");
210         ierr = (*PetscErrorPrintf)("  -  The first error was not properly handled via (for example) the use of\n");
211         ierr = (*PetscErrorPrintf)("     PetscCall(TheFunctionThatErrors()); or\n");
212         ierr = (*PetscErrorPrintf)("  -  The second error was triggered while handling the first error.\n");
213         ierr = (*PetscErrorPrintf)("  Above is the traceback for the previous unhandled error, below the traceback for the next error\n");
214         ierr = (*PetscErrorPrintf)("  ALL ERRORS in the PETSc libraries are fatal, you should add the appropriate error checking to the code\n");
215         cnt  = 1;
216       }
217     }
218     if (cnt == 1) {
219       if (n == PETSC_ERR_MEM || n == PETSC_ERR_MEM_LEAK) ierr = PetscErrorMemoryMessage(n);
220       else {
221         const char *text;
222         ierr = PetscErrorMessage(n, &text, NULL);
223         if (text) ierr = (*PetscErrorPrintf)("%s\n", text);
224       }
225       if (mess) ierr = (*PetscErrorPrintf)("%s\n", mess);
226 #if defined(PETSC_HAVE_CUDA_MIN_ARCH)
227       int confCudaArch = PETSC_HAVE_CUDA_MIN_ARCH;   // if PETSc was configured with numbered CUDA arches, get the min arch.
228       int runCudaArch  = PetscDeviceCUPMRuntimeArch; // 0 indicates the code has never initialized a cuda device.
229       if (runCudaArch && confCudaArch > runCudaArch) {
230         ierr = (*PetscErrorPrintf)("WARNING! Run on a CUDA device with GPU architecture %d, but PETSc was configured with a minimal GPU architecture %d.\n", runCudaArch, confCudaArch);
231         ierr = (*PetscErrorPrintf)("If it is a cudaErrorNoKernelImageForDevice error, you may need to reconfigure PETSc with --with-cuda-arch=%d or --with-cuda-arch=%d,%d\n", runCudaArch, runCudaArch, confCudaArch);
232       }
233 #endif
234       ierr = PetscOptionsLeftError();
235       ierr = (*PetscErrorPrintf)("See https://petsc.org/release/faq/ for trouble shooting.\n");
236       if (!PetscCIEnabledPortableErrorOutput) {
237         ierr = (*PetscErrorPrintf)("%s\n", version);
238         if (PetscErrorPrintfInitializeCalled) ierr = (*PetscErrorPrintf)("%s on a %s named %s by %s %s\n", pname, arch, hostname, username, date);
239         ierr = (*PetscErrorPrintf)("Configure options %s\n", petscconfigureoptions);
240       }
241     }
242     /* print line of stack trace */
243     if (fun) ierr = (*PetscErrorPrintf)("#%d %s() at %s:%d\n", cnt++, fun, PetscCIFilename(file), PetscCILinenumber(line));
244     else if (file) ierr = (*PetscErrorPrintf)("#%d %s:%d\n", cnt++, PetscCIFilename(file), PetscCILinenumber(line));
245     if (fun) {
246       PetscBool ismain = PETSC_FALSE;
247 
248       ierr = PetscStrncmp(fun, "main", 4, &ismain);
249       if (ismain) {
250         if ((n <= PETSC_ERR_MIN_VALUE) || (n >= PETSC_ERR_MAX_VALUE)) ierr = (*PetscErrorPrintf)("Reached the main program with an out-of-range error code %d. This should never happen\n", n);
251         ierr = PetscOptionsViewError();
252         PetscErrorPrintfHilight();
253         ierr = (*PetscErrorPrintf)("----------------End of Error Message -------send entire error message to petsc-maint@mcs.anl.gov----------\n");
254         PetscErrorPrintfNormal();
255       }
256     }
257   } else {
258     // silence this process's stacktrace if it is not the root of an originating error
259     if (p != PETSC_ERROR_REPEAT && rank) petsc_traceback_error_silent = PETSC_TRUE;
260     if (fun) {
261       PetscBool ismain = PETSC_FALSE;
262 
263       ierr = PetscStrncmp(fun, "main", 4, &ismain);
264       if (ismain && petsc_traceback_error_silent) {
265         /* This results from PetscError() being called in main: PETSCABORT()
266            will be called after the error handler.  But this thread is not the
267            root rank of the communicator that initialized the error.  So sleep
268            to allow the root thread to finish its printing.
269 
270            (Unless this is running CI, in which case do not sleep because
271            we expect all processes to call MPI_Finalize() and make a clean
272            exit.) */
273         if (!PetscCIEnabledPortableErrorOutput) ierr = PetscSleep(10.0);
274       }
275     }
276   }
277   (void)ierr;
278   return n;
279 }
280