xref: /petsc/src/sys/objects/init.c (revision cdbf8f939cdfb1c797c4b7f2cbbd00be19935363)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #include <petscsys.h>        /*I  "petscsys.h"   I*/
10 
11 #ifndef _GNU_SOURCE
12 #define _GNU_SOURCE
13 #endif
14 #if defined(PETSC_HAVE_SCHED_H)
15 #include <sched.h>
16 #endif
17 #if defined(PETSC_HAVE_PTHREAD_H)
18 #include <pthread.h>
19 #endif
20 
21 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
22 #include <sys/sysinfo.h>
23 #endif
24 #if defined(PETSC_HAVE_UNISTD_H)
25 #include <unistd.h>
26 #endif
27 #if defined(PETSC_HAVE_STDLIB_H)
28 #include <stdlib.h>
29 #endif
30 #if defined(PETSC_HAVE_MALLOC_H)
31 #include <malloc.h>
32 #endif
33 #if defined(PETSC_HAVE_VALGRIND)
34 #include <valgrind/valgrind.h>
35 #endif
36 
37 /* ------------------------Nasty global variables -------------------------------*/
38 /*
39      Indicates if PETSc started up MPI, or it was
40    already started before PETSc was initialized.
41 */
42 PetscBool    PetscBeganMPI         = PETSC_FALSE;
43 PetscBool    PetscInitializeCalled = PETSC_FALSE;
44 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
45 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
46 PetscBool    PetscThreadGo         = PETSC_TRUE;
47 PetscMPIInt  PetscGlobalRank = -1;
48 PetscMPIInt  PetscGlobalSize = -1;
49 
50 #if defined(PETSC_HAVE_PTHREADCLASSES)
51 PetscMPIInt  PetscMaxThreads = 2;
52 pthread_t*   PetscThreadPoint;
53 #define PETSC_HAVE_PTHREAD_BARRIER
54 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
55 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
56 #endif
57 PetscErrorCode ithreaderr = 0;
58 int*         pVal;
59 
60 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
61 int* ThreadCoreAffinity;
62 
63 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
64 
65 typedef struct {
66   pthread_mutex_t** mutexarray;
67   pthread_cond_t**  cond1array;
68   pthread_cond_t** cond2array;
69   void* (*pfunc)(void*);
70   void** pdata;
71   PetscBool startJob;
72   estat eJobStat;
73   PetscBool** arrThreadStarted;
74   PetscBool** arrThreadReady;
75 } sjob_tree;
76 sjob_tree job_tree;
77 typedef struct {
78   pthread_mutex_t** mutexarray;
79   pthread_cond_t**  cond1array;
80   pthread_cond_t** cond2array;
81   void* (*pfunc)(void*);
82   void** pdata;
83   PetscBool** arrThreadReady;
84 } sjob_main;
85 sjob_main job_main;
86 typedef struct {
87   pthread_mutex_t** mutexarray;
88   pthread_cond_t**  cond1array;
89   pthread_cond_t** cond2array;
90   void* (*pfunc)(void*);
91   void** pdata;
92   PetscBool startJob;
93   estat eJobStat;
94   PetscBool** arrThreadStarted;
95   PetscBool** arrThreadReady;
96 } sjob_chain;
97 sjob_chain job_chain;
98 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
99 typedef struct {
100   pthread_mutex_t mutex;
101   pthread_cond_t cond;
102   void* (*pfunc)(void*);
103   void** pdata;
104   pthread_barrier_t* pbarr;
105   int iNumJobThreads;
106   int iNumReadyThreads;
107   PetscBool startJob;
108 } sjob_true;
109 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
110 #endif
111 
112 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
113 char* arrmutex; /* used by 'chain','main','tree' thread pools */
114 char* arrcond1; /* used by 'chain','main','tree' thread pools */
115 char* arrcond2; /* used by 'chain','main','tree' thread pools */
116 char* arrstart; /* used by 'chain','main','tree' thread pools */
117 char* arrready; /* used by 'chain','main','tree' thread pools */
118 
119 /* Function Pointers */
120 void*          (*PetscThreadFunc)(void*) = NULL;
121 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
122 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
123 void           (*MainWait)(void) = NULL;
124 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
125 /**** Tree Thread Pool Functions ****/
126 void*          PetscThreadFunc_Tree(void*);
127 void*          PetscThreadInitialize_Tree(PetscInt);
128 PetscErrorCode PetscThreadFinalize_Tree(void);
129 void           MainWait_Tree(void);
130 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
131 /**** Main Thread Pool Functions ****/
132 void*          PetscThreadFunc_Main(void*);
133 void*          PetscThreadInitialize_Main(PetscInt);
134 PetscErrorCode PetscThreadFinalize_Main(void);
135 void           MainWait_Main(void);
136 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
137 /**** Chain Thread Pool Functions ****/
138 void*          PetscThreadFunc_Chain(void*);
139 void*          PetscThreadInitialize_Chain(PetscInt);
140 PetscErrorCode PetscThreadFinalize_Chain(void);
141 void           MainWait_Chain(void);
142 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
143 /**** True Thread Pool Functions ****/
144 void*          PetscThreadFunc_True(void*);
145 void*          PetscThreadInitialize_True(PetscInt);
146 PetscErrorCode PetscThreadFinalize_True(void);
147 void           MainWait_True(void);
148 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
149 /**** NO Thread Pool Function  ****/
150 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
151 /****  ****/
152 void* FuncFinish(void*);
153 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
154 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
155 #endif
156 
157 #if defined(PETSC_USE_COMPLEX)
158 #if defined(PETSC_COMPLEX_INSTANTIATE)
159 template <> class std::complex<double>; /* instantiate complex template class */
160 #endif
161 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
162 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
163 MPI_Datatype   MPI_C_COMPLEX;
164 #endif
165 PetscScalar    PETSC_i;
166 #else
167 PetscScalar    PETSC_i = 0.0;
168 #endif
169 #if defined(PETSC_USE_REAL___FLOAT128)
170 MPI_Datatype   MPIU___FLOAT128 = 0;
171 #endif
172 MPI_Datatype   MPIU_2SCALAR = 0;
173 MPI_Datatype   MPIU_2INT = 0;
174 
175 /*
176      These are needed by petscbt.h
177 */
178 #include <petscbt.h>
179 char      _BT_mask = ' ';
180 char      _BT_c = ' ';
181 PetscInt  _BT_idx  = 0;
182 
183 /*
184        Function that is called to display all error messages
185 */
186 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
187 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
188 #if defined(PETSC_HAVE_MATLAB_ENGINE)
189 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
190 #else
191 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
192 #endif
193 /*
194   This is needed to turn on/off cusp synchronization */
195 PetscBool   synchronizeCUSP = PETSC_FALSE;
196 
197 /* ------------------------------------------------------------------------------*/
198 /*
199    Optional file where all PETSc output from various prints is saved
200 */
201 FILE *petsc_history = PETSC_NULL;
202 
203 #undef __FUNCT__
204 #define __FUNCT__ "PetscOpenHistoryFile"
205 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
206 {
207   PetscErrorCode ierr;
208   PetscMPIInt    rank,size;
209   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
210   char           version[256];
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
214   if (!rank) {
215     char        arch[10];
216     int         err;
217     PetscViewer viewer;
218 
219     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
220     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
221     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
222     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
223     if (filename) {
224       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
225     } else {
226       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
227       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
228       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
229     }
230 
231     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
232     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
233     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
234     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
235     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
236     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
237     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
238     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
239     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
240     err = fflush(*fd);
241     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "PetscCloseHistoryFile"
248 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
249 {
250   PetscErrorCode ierr;
251   PetscMPIInt    rank;
252   char           date[64];
253   int            err;
254 
255   PetscFunctionBegin;
256   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
257   if (!rank) {
258     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
259     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
260     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
261     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
262     err = fflush(*fd);
263     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
264     err = fclose(*fd);
265     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
266   }
267   PetscFunctionReturn(0);
268 }
269 
270 /* ------------------------------------------------------------------------------*/
271 
272 /*
273    This is ugly and probably belongs somewhere else, but I want to
274   be able to put a true MPI abort error handler with command line args.
275 
276     This is so MPI errors in the debugger will leave all the stack
277   frames. The default MP_Abort() cleans up and exits thus providing no useful information
278   in the debugger hence we call abort() instead of MPI_Abort().
279 */
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "Petsc_MPI_AbortOnError"
283 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
284 {
285   PetscFunctionBegin;
286   (*PetscErrorPrintf)("MPI error %d\n",*flag);
287   abort();
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
292 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
293 {
294   PetscErrorCode ierr;
295 
296   PetscFunctionBegin;
297   (*PetscErrorPrintf)("MPI error %d\n",*flag);
298   ierr = PetscAttachDebugger();
299   if (ierr) { /* hopeless so get out */
300     MPI_Abort(*comm,*flag);
301   }
302 }
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "PetscEnd"
306 /*@C
307    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
308      wishes a clean exit somewhere deep in the program.
309 
310    Collective on PETSC_COMM_WORLD
311 
312    Options Database Keys are the same as for PetscFinalize()
313 
314    Level: advanced
315 
316    Note:
317    See PetscInitialize() for more general runtime options.
318 
319 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
320 @*/
321 PetscErrorCode  PetscEnd(void)
322 {
323   PetscFunctionBegin;
324   PetscFinalize();
325   exit(0);
326   return 0;
327 }
328 
329 PetscBool    PetscOptionsPublish = PETSC_FALSE;
330 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
331 extern PetscBool  petscsetmallocvisited;
332 static char       emacsmachinename[256];
333 
334 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
335 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "PetscSetHelpVersionFunctions"
339 /*@C
340    PetscSetHelpVersionFunctions - Sets functions that print help and version information
341    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
342    This routine enables a "higher-level" package that uses PETSc to print its messages first.
343 
344    Input Parameter:
345 +  help - the help function (may be PETSC_NULL)
346 -  version - the version function (may be PETSC_NULL)
347 
348    Level: developer
349 
350    Concepts: package help message
351 
352 @*/
353 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
354 {
355   PetscFunctionBegin;
356   PetscExternalHelpFunction    = help;
357   PetscExternalVersionFunction = version;
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
363 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
364 {
365   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
366   MPI_Comm       comm = PETSC_COMM_WORLD;
367   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
368   PetscErrorCode ierr;
369   PetscReal      si;
370   int            i;
371   PetscMPIInt    rank;
372   char           version[256];
373 
374   PetscFunctionBegin;
375   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
376 
377   /*
378       Setup the memory management; support for tracing malloc() usage
379   */
380   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
381 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
382   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
383   if ((!flg2 || flg1) && !petscsetmallocvisited) {
384 #if defined(PETSC_HAVE_VALGRIND)
385     if (flg2 || !(RUNNING_ON_VALGRIND)) {
386       /* turn off default -malloc if valgrind is being used */
387 #endif
388       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
389 #if defined(PETSC_HAVE_VALGRIND)
390     }
391 #endif
392   }
393 #else
394   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
395   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
396   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
397 #endif
398   if (flg3) {
399     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
400   }
401   flg1 = PETSC_FALSE;
402   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
403   if (flg1) {
404     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
405     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
406   }
407 
408   flg1 = PETSC_FALSE;
409   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
410   if (!flg1) {
411     flg1 = PETSC_FALSE;
412     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
413   }
414   if (flg1) {
415     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
416   }
417 
418   /*
419       Set the display variable for graphics
420   */
421   ierr = PetscSetDisplay();CHKERRQ(ierr);
422 
423 
424   /*
425       Print the PETSc version information
426   */
427   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
428   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
429   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
430   if (flg1 || flg2 || flg3){
431 
432     /*
433        Print "higher-level" package version message
434     */
435     if (PetscExternalVersionFunction) {
436       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
437     }
438 
439     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
441 ------------------------------\n");CHKERRQ(ierr);
442     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
443     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
444     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
445     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
446     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
447     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
448     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
449 ------------------------------\n");CHKERRQ(ierr);
450   }
451 
452   /*
453        Print "higher-level" package help message
454   */
455   if (flg3){
456     if (PetscExternalHelpFunction) {
457       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
458     }
459   }
460 
461   /*
462       Setup the error handling
463   */
464   flg1 = PETSC_FALSE;
465   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
466   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
467   flg1 = PETSC_FALSE;
468   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
469   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
470   flg1 = PETSC_FALSE;
471   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
472   if (flg1) {
473     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
474   }
475   flg1 = PETSC_FALSE;
476   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
477   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
478   flg1 = PETSC_FALSE;
479   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
480   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
481 
482   /*
483       Setup debugger information
484   */
485   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
486   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
487   if (flg1) {
488     MPI_Errhandler err_handler;
489 
490     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
491     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
492     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
493     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
494   }
495   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
496   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
497   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
498   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
499   if (flg1 || flg2) {
500     PetscMPIInt    size;
501     PetscInt       lsize,*nodes;
502     MPI_Errhandler err_handler;
503     /*
504        we have to make sure that all processors have opened
505        connections to all other processors, otherwise once the
506        debugger has stated it is likely to receive a SIGUSR1
507        and kill the program.
508     */
509     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
510     if (size > 2) {
511       PetscMPIInt dummy = 0;
512       MPI_Status  status;
513       for (i=0; i<size; i++) {
514         if (rank != i) {
515           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
516         }
517       }
518       for (i=0; i<size; i++) {
519         if (rank != i) {
520           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
521         }
522       }
523     }
524     /* check if this processor node should be in debugger */
525     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
526     lsize = size;
527     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
528     if (flag) {
529       for (i=0; i<lsize; i++) {
530         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
531       }
532     }
533     if (!flag) {
534       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
535       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
536       if (flg1) {
537         ierr = PetscAttachDebugger();CHKERRQ(ierr);
538       } else {
539         ierr = PetscStopForDebugger();CHKERRQ(ierr);
540       }
541       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
542       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
543     }
544     ierr = PetscFree(nodes);CHKERRQ(ierr);
545   }
546 
547   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
548   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
549 
550 #if defined(PETSC_USE_SOCKET_VIEWER)
551   /*
552     Activates new sockets for zope if needed
553   */
554   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
555   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
556   if (flgz){
557     int  sockfd;
558     char hostname[256];
559     char username[256];
560     int  remoteport = 9999;
561 
562     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
563     if (!hostname[0]){
564       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
565     }
566     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
567     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
568     PETSC_ZOPEFD = fdopen(sockfd, "w");
569     if (flgzout){
570       PETSC_STDOUT = PETSC_ZOPEFD;
571       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
572       fprintf(PETSC_STDOUT, "<<<start>>>");
573     } else {
574       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
575       fprintf(PETSC_ZOPEFD, "<<<start>>>");
576     }
577   }
578 #endif
579 #if defined(PETSC_USE_SERVER)
580   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
581   if (flgz){
582     PetscInt port = PETSC_DECIDE;
583     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
584     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
585   }
586 #endif
587 
588   /*
589         Setup profiling and logging
590   */
591 #if defined (PETSC_USE_INFO)
592   {
593     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
594     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
595     if (flg1 && logname[0]) {
596       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
597     } else if (flg1) {
598       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
599     }
600   }
601 #endif
602 #if defined(PETSC_USE_LOG)
603   mname[0] = 0;
604   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
605   if (flg1) {
606     if (mname[0]) {
607       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
608     } else {
609       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
610     }
611   }
612 #if defined(PETSC_HAVE_MPE)
613   flg1 = PETSC_FALSE;
614   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
615   if (flg1) PetscLogMPEBegin();
616 #endif
617   flg1 = PETSC_FALSE;
618   flg2 = PETSC_FALSE;
619   flg3 = PETSC_FALSE;
620   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
621   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
622   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
623   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
624   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
625   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
626 
627   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
628   if (flg1) {
629     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
630     FILE *file;
631     if (mname[0]) {
632       sprintf(name,"%s.%d",mname,rank);
633       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
634       file = fopen(fname,"w");
635       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
636     } else {
637       file = PETSC_STDOUT;
638     }
639     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
640   }
641 #endif
642 
643   /*
644       Setup building of stack frames for all function calls
645   */
646 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
647   ierr = PetscStackCreate();CHKERRQ(ierr);
648 #endif
649 
650   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
651 
652 #if defined(PETSC_HAVE_PTHREADCLASSES)
653   /*
654       Determine whether user specified maximum number of threads
655    */
656   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
657 
658   ierr = PetscOptionsHasName(PETSC_NULL,"-main",&flg1);CHKERRQ(ierr);
659   if(flg1) {
660     cpu_set_t mset;
661     int icorr,ncorr = get_nprocs();
662     ierr = PetscOptionsGetInt(PETSC_NULL,"-main",&icorr,PETSC_NULL);CHKERRQ(ierr);
663     CPU_ZERO(&mset);
664     CPU_SET(icorr%ncorr,&mset);
665     sched_setaffinity(0,sizeof(cpu_set_t),&mset);
666   }
667 
668   PetscInt N_CORES = get_nprocs();
669   ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
670   char tstr[9];
671   char tbuf[2];
672   strcpy(tstr,"-thread");
673   for(i=0;i<PetscMaxThreads;i++) {
674     ThreadCoreAffinity[i] = i;
675     sprintf(tbuf,"%d",i);
676     strcat(tstr,tbuf);
677     ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
678     if(flg1) {
679       ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
680       ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
681     }
682     tstr[7] = '\0';
683   }
684 
685   /*
686       Determine whether to use thread pool
687    */
688   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
689   if (flg1) {
690     PetscUseThreadPool = PETSC_TRUE;
691     /* get the thread pool type */
692     PetscInt ipool = 0;
693     const char *choices[4] = {"true","tree","main","chain"};
694 
695     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
696     switch(ipool) {
697     case 1:
698       PetscThreadFunc       = &PetscThreadFunc_Tree;
699       PetscThreadInitialize = &PetscThreadInitialize_Tree;
700       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
701       MainWait              = &MainWait_Tree;
702       MainJob               = &MainJob_Tree;
703       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
704       break;
705     case 2:
706       PetscThreadFunc       = &PetscThreadFunc_Main;
707       PetscThreadInitialize = &PetscThreadInitialize_Main;
708       PetscThreadFinalize   = &PetscThreadFinalize_Main;
709       MainWait              = &MainWait_Main;
710       MainJob               = &MainJob_Main;
711       PetscInfo(PETSC_NULL,"Using main thread pool\n");
712       break;
713 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
714     case 3:
715 #else
716     default:
717 #endif
718       PetscThreadFunc       = &PetscThreadFunc_Chain;
719       PetscThreadInitialize = &PetscThreadInitialize_Chain;
720       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
721       MainWait              = &MainWait_Chain;
722       MainJob               = &MainJob_Chain;
723       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
724       break;
725 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
726     default:
727       PetscThreadFunc       = &PetscThreadFunc_True;
728       PetscThreadInitialize = &PetscThreadInitialize_True;
729       PetscThreadFinalize   = &PetscThreadFinalize_True;
730       MainWait              = &MainWait_True;
731       MainJob               = &MainJob_True;
732       PetscInfo(PETSC_NULL,"Using true thread pool\n");
733       break;
734 #endif
735     }
736     PetscThreadInitialize(PetscMaxThreads);
737   } else {
738     //need to define these in the case on 'no threads' or 'thread create/destroy'
739     //could take any of the above versions
740     MainJob               = &MainJob_Spawn;
741   }
742 #endif
743   /*
744        Print basic help message
745   */
746   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
747   if (flg1) {
748     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
763     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
764     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
768     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
769     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
770     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
771     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
773     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
776     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
777     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
778     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
779     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
780     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
781     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
782 #if defined(PETSC_USE_LOG)
783     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
784     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
785     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
786 #if defined(PETSC_HAVE_MPE)
787     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
788 #endif
789     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
790 #endif
791     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
792     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
793     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
794     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
795   }
796 
797   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
798   if (flg1) {
799     ierr = PetscSleep(si);CHKERRQ(ierr);
800   }
801 
802   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
803   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
804   if (f) {
805     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
806   }
807 
808 #if defined(PETSC_HAVE_CUSP)
809   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
810   if (flg3) flg1 = PETSC_TRUE;
811   else flg1 = PETSC_FALSE;
812   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
813   if (flg1) synchronizeCUSP = PETSC_TRUE;
814 #endif
815 
816   PetscFunctionReturn(0);
817 }
818 
819 #if defined(PETSC_HAVE_PTHREADCLASSES)
820 
821 /**** 'Tree' Thread Pool Functions ****/
822 void* PetscThreadFunc_Tree(void* arg) {
823   PetscErrorCode iterr;
824   int icorr,ierr;
825   int* pId = (int*)arg;
826   int ThreadId = *pId,Mary = 2,i,SubWorker;
827   PetscBool PeeOn;
828   cpu_set_t mset;
829   //printf("Thread %d In Tree Thread Function\n",ThreadId);
830   icorr = ThreadCoreAffinity[ThreadId];
831   CPU_ZERO(&mset);
832   CPU_SET(icorr,&mset);
833   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
834 
835   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
836     PeeOn = PETSC_TRUE;
837   }
838   else {
839     PeeOn = PETSC_FALSE;
840   }
841   if(PeeOn==PETSC_FALSE) {
842     /* check your subordinates, wait for them to be ready */
843     for(i=1;i<=Mary;i++) {
844       SubWorker = Mary*ThreadId+i;
845       if(SubWorker<PetscMaxThreads) {
846         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
847         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
848           /* upon entry, automically releases the lock and blocks
849            upon return, has the lock */
850           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
851         }
852         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
853       }
854     }
855     /* your subordinates are now ready */
856   }
857   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
858   /* update your ready status */
859   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
860   if(ThreadId==0) {
861     job_tree.eJobStat = JobCompleted;
862     /* ignal main */
863     ierr = pthread_cond_signal(&main_cond);
864   }
865   else {
866     /* tell your boss that you're ready to work */
867     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
868   }
869   /* the while loop needs to have an exit
870   the 'main' thread can terminate all the threads by performing a broadcast
871    and calling FuncFinish */
872   while(PetscThreadGo) {
873     /*need to check the condition to ensure we don't have to wait
874       waiting when you don't have to causes problems
875      also need to check the condition to ensure proper handling of spurious wakeups */
876     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
877       /* upon entry, automically releases the lock and blocks
878        upon return, has the lock */
879         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
880 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
881 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
882     }
883     if(ThreadId==0) {
884       job_tree.startJob = PETSC_FALSE;
885       job_tree.eJobStat = ThreadsWorking;
886     }
887     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
888     if(PeeOn==PETSC_FALSE) {
889       /* tell your subordinates it's time to get to work */
890       for(i=1; i<=Mary; i++) {
891 	SubWorker = Mary*ThreadId+i;
892         if(SubWorker<PetscMaxThreads) {
893           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
894         }
895       }
896     }
897     /* do your job */
898     if(job_tree.pdata==NULL) {
899       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
900     }
901     else {
902       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
903     }
904     if(iterr!=0) {
905       ithreaderr = 1;
906     }
907     if(PetscThreadGo) {
908       /* reset job, get ready for more */
909       if(PeeOn==PETSC_FALSE) {
910         /* check your subordinates, waiting for them to be ready
911          how do you know for a fact that a given subordinate has actually started? */
912 	for(i=1;i<=Mary;i++) {
913 	  SubWorker = Mary*ThreadId+i;
914           if(SubWorker<PetscMaxThreads) {
915             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
916             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
917               /* upon entry, automically releases the lock and blocks
918                upon return, has the lock */
919               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
920             }
921             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
922           }
923 	}
924         /* your subordinates are now ready */
925       }
926       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
927       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
928       if(ThreadId==0) {
929 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
930         /* root thread signals 'main' */
931         ierr = pthread_cond_signal(&main_cond);
932       }
933       else {
934         /* signal your boss before you go to sleep */
935         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
936       }
937     }
938   }
939   return NULL;
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "PetscThreadInitialize_Tree"
944 void* PetscThreadInitialize_Tree(PetscInt N) {
945   PetscInt i,ierr;
946   int status;
947 
948   if(PetscUseThreadPool) {
949     size_t Val1 = (size_t)CACHE_LINE_SIZE;
950     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
951     arrmutex = (char*)memalign(Val1,Val2);
952     arrcond1 = (char*)memalign(Val1,Val2);
953     arrcond2 = (char*)memalign(Val1,Val2);
954     arrstart = (char*)memalign(Val1,Val2);
955     arrready = (char*)memalign(Val1,Val2);
956     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
957     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
958     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
959     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
960     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
961     /* initialize job structure */
962     for(i=0; i<PetscMaxThreads; i++) {
963       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
964       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
965       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
966       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
967       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
968     }
969     for(i=0; i<PetscMaxThreads; i++) {
970       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
971       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
972       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
973       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
974       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
975     }
976     job_tree.pfunc = NULL;
977     job_tree.pdata = (void**)malloc(N*sizeof(void*));
978     job_tree.startJob = PETSC_FALSE;
979     job_tree.eJobStat = JobInitiated;
980     pVal = (int*)malloc(N*sizeof(int));
981     /* allocate memory in the heap for the thread structure */
982     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
983     /* create threads */
984     for(i=0; i<N; i++) {
985       pVal[i] = i;
986       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
987       /* should check status */
988     }
989   }
990   return NULL;
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "PetscThreadFinalize_Tree"
995 PetscErrorCode PetscThreadFinalize_Tree() {
996   int i,ierr;
997   void* jstatus;
998 
999   PetscFunctionBegin;
1000 
1001   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1002   /* join the threads */
1003   for(i=0; i<PetscMaxThreads; i++) {
1004     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1005     /* do error checking*/
1006   }
1007   free(PetscThreadPoint);
1008   free(arrmutex);
1009   free(arrcond1);
1010   free(arrcond2);
1011   free(arrstart);
1012   free(arrready);
1013   free(job_tree.pdata);
1014   free(pVal);
1015 
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MainWait_Tree"
1021 void MainWait_Tree() {
1022   int ierr;
1023   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1024   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1025     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1026   }
1027   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MainJob_Tree"
1032 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1033   int i,ierr;
1034   PetscErrorCode ijoberr = 0;
1035 
1036   MainWait();
1037   job_tree.pfunc = pFunc;
1038   job_tree.pdata = data;
1039   job_tree.startJob = PETSC_TRUE;
1040   for(i=0; i<PetscMaxThreads; i++) {
1041     *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1042   }
1043   job_tree.eJobStat = JobInitiated;
1044   ierr = pthread_cond_signal(job_tree.cond2array[0]);
1045   if(pFunc!=FuncFinish) {
1046     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1047   }
1048 
1049   if(ithreaderr) {
1050     ijoberr = ithreaderr;
1051   }
1052   return ijoberr;
1053 }
1054 /****  ****/
1055 
1056 /**** 'Main' Thread Pool Functions ****/
1057 void* PetscThreadFunc_Main(void* arg) {
1058   PetscErrorCode iterr;
1059   int icorr,ierr;
1060   int* pId = (int*)arg;
1061   int ThreadId = *pId;
1062   cpu_set_t mset;
1063   //printf("Thread %d In Main Thread Function\n",ThreadId);
1064   icorr = ThreadCoreAffinity[ThreadId];
1065   CPU_ZERO(&mset);
1066   CPU_SET(icorr,&mset);
1067   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1068 
1069   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1070   /* update your ready status */
1071   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1072   /* tell the BOSS that you're ready to work before you go to sleep */
1073   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1074 
1075   /* the while loop needs to have an exit
1076      the 'main' thread can terminate all the threads by performing a broadcast
1077      and calling FuncFinish */
1078   while(PetscThreadGo) {
1079     /* need to check the condition to ensure we don't have to wait
1080        waiting when you don't have to causes problems
1081      also need to check the condition to ensure proper handling of spurious wakeups */
1082     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1083       /* upon entry, atomically releases the lock and blocks
1084        upon return, has the lock */
1085         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1086 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1087     }
1088     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1089     if(job_main.pdata==NULL) {
1090       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1091     }
1092     else {
1093       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1094     }
1095     if(iterr!=0) {
1096       ithreaderr = 1;
1097     }
1098     if(PetscThreadGo) {
1099       /* reset job, get ready for more */
1100       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1101       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1102       /* tell the BOSS that you're ready to work before you go to sleep */
1103       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1104     }
1105   }
1106   return NULL;
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "PetscThreadInitialize_Main"
1111 void* PetscThreadInitialize_Main(PetscInt N) {
1112   PetscInt i,ierr;
1113   int status;
1114 
1115   if(PetscUseThreadPool) {
1116     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1117     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1118     arrmutex = (char*)memalign(Val1,Val2);
1119     arrcond1 = (char*)memalign(Val1,Val2);
1120     arrcond2 = (char*)memalign(Val1,Val2);
1121     arrstart = (char*)memalign(Val1,Val2);
1122     arrready = (char*)memalign(Val1,Val2);
1123     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1124     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1125     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1126     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1127     /* initialize job structure */
1128     for(i=0; i<PetscMaxThreads; i++) {
1129       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1130       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1131       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1132       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1133     }
1134     for(i=0; i<PetscMaxThreads; i++) {
1135       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1136       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1137       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1138       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1139     }
1140     job_main.pfunc = NULL;
1141     job_main.pdata = (void**)malloc(N*sizeof(void*));
1142     pVal = (int*)malloc(N*sizeof(int));
1143     /* allocate memory in the heap for the thread structure */
1144     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1145     /* create threads */
1146     for(i=0; i<N; i++) {
1147       pVal[i] = i;
1148       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1149       /* error check */
1150     }
1151   }
1152   else {
1153   }
1154   return NULL;
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "PetscThreadFinalize_Main"
1159 PetscErrorCode PetscThreadFinalize_Main() {
1160   int i,ierr;
1161   void* jstatus;
1162 
1163   PetscFunctionBegin;
1164 
1165   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1166   /* join the threads */
1167   for(i=0; i<PetscMaxThreads; i++) {
1168     ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1169   }
1170   free(PetscThreadPoint);
1171   free(arrmutex);
1172   free(arrcond1);
1173   free(arrcond2);
1174   free(arrstart);
1175   free(arrready);
1176   free(job_main.pdata);
1177   free(pVal);
1178 
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MainWait_Main"
1184 void MainWait_Main() {
1185   int i,ierr;
1186   for(i=0; i<PetscMaxThreads; i++) {
1187     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1188     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1189       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1190     }
1191     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1192   }
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MainJob_Main"
1197 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1198   int i,ierr;
1199   PetscErrorCode ijoberr = 0;
1200 
1201   MainWait(); /* you know everyone is waiting to be signalled! */
1202   job_main.pfunc = pFunc;
1203   job_main.pdata = data;
1204   for(i=0; i<PetscMaxThreads; i++) {
1205     *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1206   }
1207   /* tell the threads to go to work */
1208   for(i=0; i<PetscMaxThreads; i++) {
1209     ierr = pthread_cond_signal(job_main.cond2array[i]);
1210   }
1211   if(pFunc!=FuncFinish) {
1212     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1213   }
1214 
1215   if(ithreaderr) {
1216     ijoberr = ithreaderr;
1217   }
1218   return ijoberr;
1219 }
1220 /****  ****/
1221 
1222 /**** Chain Thread Functions ****/
1223 void* PetscThreadFunc_Chain(void* arg) {
1224   PetscErrorCode iterr;
1225   int icorr,ierr;
1226   int* pId = (int*)arg;
1227   int ThreadId = *pId;
1228   int SubWorker = ThreadId + 1;
1229   PetscBool PeeOn;
1230   cpu_set_t mset;
1231   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1232   icorr = ThreadCoreAffinity[ThreadId];
1233   CPU_ZERO(&mset);
1234   CPU_SET(icorr,&mset);
1235   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1236 
1237   if(ThreadId==(PetscMaxThreads-1)) {
1238     PeeOn = PETSC_TRUE;
1239   }
1240   else {
1241     PeeOn = PETSC_FALSE;
1242   }
1243   if(PeeOn==PETSC_FALSE) {
1244     /* check your subordinate, wait for him to be ready */
1245     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1246     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1247       /* upon entry, automically releases the lock and blocks
1248        upon return, has the lock */
1249       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1250     }
1251     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1252     /* your subordinate is now ready*/
1253   }
1254   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1255   /* update your ready status */
1256   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1257   if(ThreadId==0) {
1258     job_chain.eJobStat = JobCompleted;
1259     /* signal main */
1260     ierr = pthread_cond_signal(&main_cond);
1261   }
1262   else {
1263     /* tell your boss that you're ready to work */
1264     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1265   }
1266   /*  the while loop needs to have an exit
1267      the 'main' thread can terminate all the threads by performing a broadcast
1268    and calling FuncFinish */
1269   while(PetscThreadGo) {
1270     /* need to check the condition to ensure we don't have to wait
1271        waiting when you don't have to causes problems
1272      also need to check the condition to ensure proper handling of spurious wakeups */
1273     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1274       /*upon entry, automically releases the lock and blocks
1275        upon return, has the lock */
1276         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1277 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1278 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1279     }
1280     if(ThreadId==0) {
1281       job_chain.startJob = PETSC_FALSE;
1282       job_chain.eJobStat = ThreadsWorking;
1283     }
1284     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1285     if(PeeOn==PETSC_FALSE) {
1286       /* tell your subworker it's time to get to work */
1287       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1288     }
1289     /* do your job */
1290     if(job_chain.pdata==NULL) {
1291       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1292     }
1293     else {
1294       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1295     }
1296     if(iterr!=0) {
1297       ithreaderr = 1;
1298     }
1299     if(PetscThreadGo) {
1300       /* reset job, get ready for more */
1301       if(PeeOn==PETSC_FALSE) {
1302         /* check your subordinate, wait for him to be ready
1303          how do you know for a fact that your subordinate has actually started? */
1304         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1305         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1306           /* upon entry, automically releases the lock and blocks
1307            upon return, has the lock */
1308           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1309         }
1310         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1311         /* your subordinate is now ready */
1312       }
1313       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1314       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1315       if(ThreadId==0) {
1316 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1317         /* root thread (foreman) signals 'main' */
1318         ierr = pthread_cond_signal(&main_cond);
1319       }
1320       else {
1321         /* signal your boss before you go to sleep */
1322         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1323       }
1324     }
1325   }
1326   return NULL;
1327 }
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "PetscThreadInitialize_Chain"
1331 void* PetscThreadInitialize_Chain(PetscInt N) {
1332   PetscInt i,ierr;
1333   int status;
1334 
1335   if(PetscUseThreadPool) {
1336     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1337     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1338     arrmutex = (char*)memalign(Val1,Val2);
1339     arrcond1 = (char*)memalign(Val1,Val2);
1340     arrcond2 = (char*)memalign(Val1,Val2);
1341     arrstart = (char*)memalign(Val1,Val2);
1342     arrready = (char*)memalign(Val1,Val2);
1343     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1344     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1345     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1346     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1347     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1348     /* initialize job structure */
1349     for(i=0; i<PetscMaxThreads; i++) {
1350       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1351       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1352       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1353       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1354       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1355     }
1356     for(i=0; i<PetscMaxThreads; i++) {
1357       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1358       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1359       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1360       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1361       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1362     }
1363     job_chain.pfunc = NULL;
1364     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1365     job_chain.startJob = PETSC_FALSE;
1366     job_chain.eJobStat = JobInitiated;
1367     pVal = (int*)malloc(N*sizeof(int));
1368     /* allocate memory in the heap for the thread structure */
1369     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1370     /* create threads */
1371     for(i=0; i<N; i++) {
1372       pVal[i] = i;
1373       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1374       /* should check error */
1375     }
1376   }
1377   else {
1378   }
1379   return NULL;
1380 }
1381 
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "PetscThreadFinalize_Chain"
1385 PetscErrorCode PetscThreadFinalize_Chain() {
1386   int i,ierr;
1387   void* jstatus;
1388 
1389   PetscFunctionBegin;
1390 
1391   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1392   /* join the threads */
1393   for(i=0; i<PetscMaxThreads; i++) {
1394     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1395     /* should check error */
1396   }
1397   free(PetscThreadPoint);
1398   free(arrmutex);
1399   free(arrcond1);
1400   free(arrcond2);
1401   free(arrstart);
1402   free(arrready);
1403   free(job_chain.pdata);
1404   free(pVal);
1405 
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MainWait_Chain"
1411 void MainWait_Chain() {
1412   int ierr;
1413   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1414   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1415     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1416   }
1417   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MainJob_Chain"
1422 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1423   int i,ierr;
1424   PetscErrorCode ijoberr = 0;
1425 
1426   MainWait();
1427   job_chain.pfunc = pFunc;
1428   job_chain.pdata = data;
1429   job_chain.startJob = PETSC_TRUE;
1430   for(i=0; i<PetscMaxThreads; i++) {
1431     *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1432   }
1433   job_chain.eJobStat = JobInitiated;
1434   ierr = pthread_cond_signal(job_chain.cond2array[0]);
1435   if(pFunc!=FuncFinish) {
1436     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1437   }
1438 
1439   if(ithreaderr) {
1440     ijoberr = ithreaderr;
1441   }
1442   return ijoberr;
1443 }
1444 /****  ****/
1445 
1446 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1447 /**** True Thread Functions ****/
1448 void* PetscThreadFunc_True(void* arg) {
1449   int icorr,ierr,iVal;
1450   int* pId = (int*)arg;
1451   int ThreadId = *pId;
1452   PetscErrorCode iterr;
1453   cpu_set_t mset;
1454   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1455   icorr = ThreadCoreAffinity[ThreadId];
1456   CPU_ZERO(&mset);
1457   CPU_SET(icorr,&mset);
1458   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1459 
1460   ierr = pthread_mutex_lock(&job_true.mutex);
1461   job_true.iNumReadyThreads++;
1462   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1463     ierr = pthread_cond_signal(&main_cond);
1464   }
1465   /*the while loop needs to have an exit
1466     the 'main' thread can terminate all the threads by performing a broadcast
1467    and calling FuncFinish */
1468   while(PetscThreadGo) {
1469     /*need to check the condition to ensure we don't have to wait
1470       waiting when you don't have to causes problems
1471      also need to wait if another thread sneaks in and messes with the predicate */
1472     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1473       /* upon entry, automically releases the lock and blocks
1474        upon return, has the lock */
1475       //printf("Thread %d Going to Sleep!\n",ThreadId);
1476       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1477     }
1478     job_true.startJob = PETSC_FALSE;
1479     job_true.iNumJobThreads--;
1480     job_true.iNumReadyThreads--;
1481     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1482     pthread_mutex_unlock(&job_true.mutex);
1483     if(job_true.pdata==NULL) {
1484       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1485     }
1486     else {
1487       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1488     }
1489     if(iterr!=0) {
1490       ithreaderr = 1;
1491     }
1492     //printf("Thread %d Finished Job\n",ThreadId);
1493     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1494       what happens if a thread finishes before they all start? BAD!
1495      what happens if a thread finishes before any else start? BAD! */
1496     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1497     /* reset job */
1498     if(PetscThreadGo) {
1499       pthread_mutex_lock(&job_true.mutex);
1500       job_true.iNumReadyThreads++;
1501       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1502 	/* signal the 'main' thread that the job is done! (only done once) */
1503 	ierr = pthread_cond_signal(&main_cond);
1504       }
1505     }
1506   }
1507   return NULL;
1508 }
1509 
1510 #undef __FUNCT__
1511 #define __FUNCT__ "PetscThreadInitialize_True"
1512 void* PetscThreadInitialize_True(PetscInt N) {
1513   PetscInt i;
1514   int status;
1515 
1516   pVal = (int*)malloc(N*sizeof(int));
1517   /* allocate memory in the heap for the thread structure */
1518   PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1519   BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1520   job_true.pdata = (void**)malloc(N*sizeof(void*));
1521   for(i=0; i<N; i++) {
1522     pVal[i] = i;
1523     status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1524     /* error check to ensure proper thread creation */
1525     status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1526     /* should check error */
1527   }
1528   //printf("Finished True Thread Pool Initialization\n");
1529   return NULL;
1530 }
1531 
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "PetscThreadFinalize_True"
1535 PetscErrorCode PetscThreadFinalize_True() {
1536   int i,ierr;
1537   void* jstatus;
1538 
1539   PetscFunctionBegin;
1540 
1541   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1542   /* join the threads */
1543   for(i=0; i<PetscMaxThreads; i++) {
1544     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1545   }
1546   free(BarrPoint);
1547   free(PetscThreadPoint);
1548 
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MainWait_True"
1554 void MainWait_True() {
1555   int ierr;
1556   ierr = pthread_mutex_lock(&job_true.mutex);
1557   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1558     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1559   }
1560   ierr = pthread_mutex_unlock(&job_true.mutex);
1561 }
1562 
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MainJob_True"
1565 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1566   int ierr;
1567   PetscErrorCode ijoberr = 0;
1568 
1569   MainWait();
1570   job_true.pfunc = pFunc;
1571   job_true.pdata = data;
1572   job_true.pbarr = &BarrPoint[n];
1573   job_true.iNumJobThreads = n;
1574   job_true.startJob = PETSC_TRUE;
1575   ierr = pthread_cond_broadcast(&job_true.cond);
1576   if(pFunc!=FuncFinish) {
1577     MainWait(); /* why wait after? guarantees that job gets done */
1578   }
1579 
1580   if(ithreaderr) {
1581     ijoberr = ithreaderr;
1582   }
1583   return ijoberr;
1584 }
1585 /**** NO THREAD POOL FUNCTION ****/
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MainJob_Spawn"
1588 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1589   PetscErrorCode ijoberr = 0;
1590 
1591   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1592   PetscThreadPoint = apThread; /* point to same place */
1593   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1594   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1595   free(apThread);
1596 
1597   return ijoberr;
1598 }
1599 /****  ****/
1600 #endif
1601 
1602 void* FuncFinish(void* arg) {
1603   PetscThreadGo = PETSC_FALSE;
1604   return(0);
1605 }
1606 
1607 #endif
1608