xref: /petsc/src/sys/objects/init.c (revision cd723cd1e71baf63298996d3aa19d25195c37816)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 //#if defined(PETSC_HAVE_SCHED_H) && defined(PETSC_USE_PTHREAD)
9 #ifndef _GNU_SOURCE
10 #define _GNU_SOURCE
11 #endif
12 #if defined(PETSC_HAVE_SCHED_H)
13 #include <sched.h>
14 #endif
15 #include <petscsys.h>        /*I  "petscsys.h"   I*/
16 #if defined(PETSC_USE_PTHREAD)
17 #include <pthread.h>
18 #endif
19 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
20 #include <sys/sysinfo.h>
21 #endif
22 #include <unistd.h>
23 #if defined(PETSC_HAVE_STDLIB_H)
24 #include <stdlib.h>
25 #endif
26 #if defined(PETSC_HAVE_MALLOC_H)
27 #include <malloc.h>
28 #endif
29 #if defined(PETSC_HAVE_VALGRIND)
30 #include <valgrind/valgrind.h>
31 #endif
32 
33 /* ------------------------Nasty global variables -------------------------------*/
34 /*
35      Indicates if PETSc started up MPI, or it was
36    already started before PETSc was initialized.
37 */
38 PetscBool    PetscBeganMPI         = PETSC_FALSE;
39 PetscBool    PetscInitializeCalled = PETSC_FALSE;
40 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
41 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
42 PetscBool    PetscThreadGo         = PETSC_TRUE;
43 PetscMPIInt  PetscGlobalRank = -1;
44 PetscMPIInt  PetscGlobalSize = -1;
45 
46 #if defined(PETSC_HAVE_PTHREADCLASSES)
47 PetscMPIInt  PetscMaxThreads = 2;
48 pthread_t*   PetscThreadPoint;
49 #define PETSC_HAVE_PTHREAD_BARRIER
50 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
51 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
52 #endif
53 PetscErrorCode ithreaderr = 0;
54 int*         pVal;
55 
56 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
57 int* ThreadCoreAffinity;
58 
59 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
60 
61 typedef struct {
62   pthread_mutex_t** mutexarray;
63   pthread_cond_t**  cond1array;
64   pthread_cond_t** cond2array;
65   void* (*pfunc)(void*);
66   void** pdata;
67   PetscBool startJob;
68   estat eJobStat;
69   PetscBool** arrThreadStarted;
70   PetscBool** arrThreadReady;
71 } sjob_tree;
72 sjob_tree job_tree;
73 typedef struct {
74   pthread_mutex_t** mutexarray;
75   pthread_cond_t**  cond1array;
76   pthread_cond_t** cond2array;
77   void* (*pfunc)(void*);
78   void** pdata;
79   PetscBool** arrThreadReady;
80 } sjob_main;
81 sjob_main job_main;
82 typedef struct {
83   pthread_mutex_t** mutexarray;
84   pthread_cond_t**  cond1array;
85   pthread_cond_t** cond2array;
86   void* (*pfunc)(void*);
87   void** pdata;
88   PetscBool startJob;
89   estat eJobStat;
90   PetscBool** arrThreadStarted;
91   PetscBool** arrThreadReady;
92 } sjob_chain;
93 sjob_chain job_chain;
94 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
95 typedef struct {
96   pthread_mutex_t mutex;
97   pthread_cond_t cond;
98   void* (*pfunc)(void*);
99   void** pdata;
100   pthread_barrier_t* pbarr;
101   int iNumJobThreads;
102   int iNumReadyThreads;
103   PetscBool startJob;
104 } sjob_true;
105 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
106 #endif
107 
108 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
109 char* arrmutex; /* used by 'chain','main','tree' thread pools */
110 char* arrcond1; /* used by 'chain','main','tree' thread pools */
111 char* arrcond2; /* used by 'chain','main','tree' thread pools */
112 char* arrstart; /* used by 'chain','main','tree' thread pools */
113 char* arrready; /* used by 'chain','main','tree' thread pools */
114 
115 /* Function Pointers */
116 void*          (*PetscThreadFunc)(void*) = NULL;
117 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
118 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
119 void           (*MainWait)(void) = NULL;
120 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
121 /**** Tree Thread Pool Functions ****/
122 void*          PetscThreadFunc_Tree(void*);
123 void*          PetscThreadInitialize_Tree(PetscInt);
124 PetscErrorCode PetscThreadFinalize_Tree(void);
125 void           MainWait_Tree(void);
126 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
127 /**** Main Thread Pool Functions ****/
128 void*          PetscThreadFunc_Main(void*);
129 void*          PetscThreadInitialize_Main(PetscInt);
130 PetscErrorCode PetscThreadFinalize_Main(void);
131 void           MainWait_Main(void);
132 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
133 /**** Chain Thread Pool Functions ****/
134 void*          PetscThreadFunc_Chain(void*);
135 void*          PetscThreadInitialize_Chain(PetscInt);
136 PetscErrorCode PetscThreadFinalize_Chain(void);
137 void           MainWait_Chain(void);
138 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
139 /**** True Thread Pool Functions ****/
140 void*          PetscThreadFunc_True(void*);
141 void*          PetscThreadInitialize_True(PetscInt);
142 PetscErrorCode PetscThreadFinalize_True(void);
143 void           MainWait_True(void);
144 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
145 /**** NO Thread Pool Function  ****/
146 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
147 /****  ****/
148 void* FuncFinish(void*);
149 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
150 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
151 #endif
152 
153 #if defined(PETSC_USE_COMPLEX)
154 #if defined(PETSC_COMPLEX_INSTANTIATE)
155 template <> class std::complex<double>; /* instantiate complex template class */
156 #endif
157 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
158 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
159 MPI_Datatype   MPI_C_COMPLEX;
160 #endif
161 PetscScalar    PETSC_i;
162 #else
163 PetscScalar    PETSC_i = 0.0;
164 #endif
165 #if defined(PETSC_USE_REAL___FLOAT128)
166 MPI_Datatype   MPIU___FLOAT128 = 0;
167 #endif
168 MPI_Datatype   MPIU_2SCALAR = 0;
169 MPI_Datatype   MPIU_2INT = 0;
170 
171 /*
172      These are needed by petscbt.h
173 */
174 #include <petscbt.h>
175 char      _BT_mask = ' ';
176 char      _BT_c = ' ';
177 PetscInt  _BT_idx  = 0;
178 
179 /*
180        Function that is called to display all error messages
181 */
182 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
183 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
184 #if defined(PETSC_HAVE_MATLAB_ENGINE)
185 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
186 #else
187 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
188 #endif
189 /*
190   This is needed to turn on/off cusp synchronization */
191 PetscBool   synchronizeCUSP = PETSC_FALSE;
192 
193 /* ------------------------------------------------------------------------------*/
194 /*
195    Optional file where all PETSc output from various prints is saved
196 */
197 FILE *petsc_history = PETSC_NULL;
198 
199 #undef __FUNCT__
200 #define __FUNCT__ "PetscOpenHistoryFile"
201 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
202 {
203   PetscErrorCode ierr;
204   PetscMPIInt    rank,size;
205   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
206   char           version[256];
207 
208   PetscFunctionBegin;
209   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
210   if (!rank) {
211     char        arch[10];
212     int         err;
213     PetscViewer viewer;
214 
215     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
216     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
217     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
218     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
219     if (filename) {
220       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
221     } else {
222       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
223       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
224       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
225     }
226 
227     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
228     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
229     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
230     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
231     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
232     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
233     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
234     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
235     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
236     err = fflush(*fd);
237     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "PetscCloseHistoryFile"
244 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
245 {
246   PetscErrorCode ierr;
247   PetscMPIInt    rank;
248   char           date[64];
249   int            err;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
253   if (!rank) {
254     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
255     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
256     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
257     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
258     err = fflush(*fd);
259     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
260     err = fclose(*fd);
261     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
262   }
263   PetscFunctionReturn(0);
264 }
265 
266 /* ------------------------------------------------------------------------------*/
267 
268 /*
269    This is ugly and probably belongs somewhere else, but I want to
270   be able to put a true MPI abort error handler with command line args.
271 
272     This is so MPI errors in the debugger will leave all the stack
273   frames. The default MP_Abort() cleans up and exits thus providing no useful information
274   in the debugger hence we call abort() instead of MPI_Abort().
275 */
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "Petsc_MPI_AbortOnError"
279 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
280 {
281   PetscFunctionBegin;
282   (*PetscErrorPrintf)("MPI error %d\n",*flag);
283   abort();
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
288 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
289 {
290   PetscErrorCode ierr;
291 
292   PetscFunctionBegin;
293   (*PetscErrorPrintf)("MPI error %d\n",*flag);
294   ierr = PetscAttachDebugger();
295   if (ierr) { /* hopeless so get out */
296     MPI_Abort(*comm,*flag);
297   }
298 }
299 
300 #undef __FUNCT__
301 #define __FUNCT__ "PetscEnd"
302 /*@C
303    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
304      wishes a clean exit somewhere deep in the program.
305 
306    Collective on PETSC_COMM_WORLD
307 
308    Options Database Keys are the same as for PetscFinalize()
309 
310    Level: advanced
311 
312    Note:
313    See PetscInitialize() for more general runtime options.
314 
315 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
316 @*/
317 PetscErrorCode  PetscEnd(void)
318 {
319   PetscFunctionBegin;
320   PetscFinalize();
321   exit(0);
322   return 0;
323 }
324 
325 PetscBool    PetscOptionsPublish = PETSC_FALSE;
326 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
327 extern PetscBool  petscsetmallocvisited;
328 static char       emacsmachinename[256];
329 
330 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
331 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
332 
333 #undef __FUNCT__
334 #define __FUNCT__ "PetscSetHelpVersionFunctions"
335 /*@C
336    PetscSetHelpVersionFunctions - Sets functions that print help and version information
337    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
338    This routine enables a "higher-level" package that uses PETSc to print its messages first.
339 
340    Input Parameter:
341 +  help - the help function (may be PETSC_NULL)
342 -  version - the version function (may be PETSC_NULL)
343 
344    Level: developer
345 
346    Concepts: package help message
347 
348 @*/
349 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
350 {
351   PetscFunctionBegin;
352   PetscExternalHelpFunction    = help;
353   PetscExternalVersionFunction = version;
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
359 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
360 {
361   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
362   MPI_Comm       comm = PETSC_COMM_WORLD;
363   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
364   PetscErrorCode ierr;
365   PetscReal      si;
366   int            i;
367   PetscMPIInt    rank;
368   char           version[256];
369 
370   PetscFunctionBegin;
371   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
372 
373   /*
374       Setup the memory management; support for tracing malloc() usage
375   */
376   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
377 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
378   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
379   if ((!flg2 || flg1) && !petscsetmallocvisited) {
380 #if defined(PETSC_HAVE_VALGRIND)
381     if (flg2 || !(RUNNING_ON_VALGRIND)) {
382       /* turn off default -malloc if valgrind is being used */
383 #endif
384       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
385 #if defined(PETSC_HAVE_VALGRIND)
386     }
387 #endif
388   }
389 #else
390   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
391   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
392   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
393 #endif
394   if (flg3) {
395     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
396   }
397   flg1 = PETSC_FALSE;
398   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
399   if (flg1) {
400     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
401     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
402   }
403 
404   flg1 = PETSC_FALSE;
405   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
406   if (!flg1) {
407     flg1 = PETSC_FALSE;
408     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
409   }
410   if (flg1) {
411     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
412   }
413 
414   /*
415       Set the display variable for graphics
416   */
417   ierr = PetscSetDisplay();CHKERRQ(ierr);
418 
419 
420   /*
421       Print the PETSc version information
422   */
423   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
424   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
425   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
426   if (flg1 || flg2 || flg3){
427 
428     /*
429        Print "higher-level" package version message
430     */
431     if (PetscExternalVersionFunction) {
432       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
433     }
434 
435     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
436     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
437 ------------------------------\n");CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
439     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
441     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
442     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
443     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
444     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
445 ------------------------------\n");CHKERRQ(ierr);
446   }
447 
448   /*
449        Print "higher-level" package help message
450   */
451   if (flg3){
452     if (PetscExternalHelpFunction) {
453       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
454     }
455   }
456 
457   /*
458       Setup the error handling
459   */
460   flg1 = PETSC_FALSE;
461   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
462   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
463   flg1 = PETSC_FALSE;
464   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
465   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
466   flg1 = PETSC_FALSE;
467   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
468   if (flg1) {
469     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
470   }
471   flg1 = PETSC_FALSE;
472   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
473   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
474   flg1 = PETSC_FALSE;
475   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
476   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
477 
478   /*
479       Setup debugger information
480   */
481   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
482   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
483   if (flg1) {
484     MPI_Errhandler err_handler;
485 
486     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
487     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
488     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
489     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
490   }
491   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
492   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
493   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
494   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
495   if (flg1 || flg2) {
496     PetscMPIInt    size;
497     PetscInt       lsize,*nodes;
498     MPI_Errhandler err_handler;
499     /*
500        we have to make sure that all processors have opened
501        connections to all other processors, otherwise once the
502        debugger has stated it is likely to receive a SIGUSR1
503        and kill the program.
504     */
505     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
506     if (size > 2) {
507       PetscMPIInt dummy = 0;
508       MPI_Status  status;
509       for (i=0; i<size; i++) {
510         if (rank != i) {
511           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
512         }
513       }
514       for (i=0; i<size; i++) {
515         if (rank != i) {
516           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
517         }
518       }
519     }
520     /* check if this processor node should be in debugger */
521     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
522     lsize = size;
523     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
524     if (flag) {
525       for (i=0; i<lsize; i++) {
526         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
527       }
528     }
529     if (!flag) {
530       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
531       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
532       if (flg1) {
533         ierr = PetscAttachDebugger();CHKERRQ(ierr);
534       } else {
535         ierr = PetscStopForDebugger();CHKERRQ(ierr);
536       }
537       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
538       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
539     }
540     ierr = PetscFree(nodes);CHKERRQ(ierr);
541   }
542 
543   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
544   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
545 
546 #if defined(PETSC_USE_SOCKET_VIEWER)
547   /*
548     Activates new sockets for zope if needed
549   */
550   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
551   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
552   if (flgz){
553     int  sockfd;
554     char hostname[256];
555     char username[256];
556     int  remoteport = 9999;
557 
558     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
559     if (!hostname[0]){
560       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
561     }
562     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
563     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
564     PETSC_ZOPEFD = fdopen(sockfd, "w");
565     if (flgzout){
566       PETSC_STDOUT = PETSC_ZOPEFD;
567       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
568       fprintf(PETSC_STDOUT, "<<<start>>>");
569     } else {
570       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
571       fprintf(PETSC_ZOPEFD, "<<<start>>>");
572     }
573   }
574 #endif
575 #if defined(PETSC_USE_SERVER)
576   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
577   if (flgz){
578     PetscInt port = PETSC_DECIDE;
579     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
580     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
581   }
582 #endif
583 
584   /*
585         Setup profiling and logging
586   */
587 #if defined (PETSC_USE_INFO)
588   {
589     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
590     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
591     if (flg1 && logname[0]) {
592       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
593     } else if (flg1) {
594       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
595     }
596   }
597 #endif
598 #if defined(PETSC_USE_LOG)
599   mname[0] = 0;
600   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
601   if (flg1) {
602     if (mname[0]) {
603       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
604     } else {
605       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
606     }
607   }
608 #if defined(PETSC_HAVE_MPE)
609   flg1 = PETSC_FALSE;
610   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
611   if (flg1) PetscLogMPEBegin();
612 #endif
613   flg1 = PETSC_FALSE;
614   flg2 = PETSC_FALSE;
615   flg3 = PETSC_FALSE;
616   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
617   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
618   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
619   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
620   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
621   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
622 
623   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
624   if (flg1) {
625     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
626     FILE *file;
627     if (mname[0]) {
628       sprintf(name,"%s.%d",mname,rank);
629       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
630       file = fopen(fname,"w");
631       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
632     } else {
633       file = PETSC_STDOUT;
634     }
635     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
636   }
637 #endif
638 
639   /*
640       Setup building of stack frames for all function calls
641   */
642 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
643   ierr = PetscStackCreate();CHKERRQ(ierr);
644 #endif
645 
646   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
647 
648 #if defined(PETSC_HAVE_PTHREADCLASSES)
649   /*
650       Determine whether user specified maximum number of threads
651    */
652   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
653 
654   ierr = PetscOptionsHasName(PETSC_NULL,"-main",&flg1);CHKERRQ(ierr);
655   if(flg1) {
656     cpu_set_t mset;
657     int icorr,ncorr = get_nprocs();
658     ierr = PetscOptionsGetInt(PETSC_NULL,"-main",&icorr,PETSC_NULL);CHKERRQ(ierr);
659     CPU_ZERO(&mset);
660     CPU_SET(icorr%ncorr,&mset);
661     sched_setaffinity(0,sizeof(cpu_set_t),&mset);
662   }
663 
664   PetscInt N_CORES = get_nprocs();
665   ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
666   char tstr[9];
667   char tbuf[2];
668   strcpy(tstr,"-thread");
669   for(i=0;i<PetscMaxThreads;i++) {
670     ThreadCoreAffinity[i] = i;
671     sprintf(tbuf,"%d",i);
672     strcat(tstr,tbuf);
673     ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
674     if(flg1) {
675       ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
676       ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
677     }
678     tstr[7] = '\0';
679   }
680 
681   /*
682       Determine whether to use thread pool
683    */
684   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
685   if (flg1) {
686     PetscUseThreadPool = PETSC_TRUE;
687     /* get the thread pool type */
688     PetscInt ipool = 0;
689     const char *choices[4] = {"true","tree","main","chain"};
690 
691     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
692     switch(ipool) {
693     case 1:
694       PetscThreadFunc       = &PetscThreadFunc_Tree;
695       PetscThreadInitialize = &PetscThreadInitialize_Tree;
696       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
697       MainWait              = &MainWait_Tree;
698       MainJob               = &MainJob_Tree;
699       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
700       break;
701     case 2:
702       PetscThreadFunc       = &PetscThreadFunc_Main;
703       PetscThreadInitialize = &PetscThreadInitialize_Main;
704       PetscThreadFinalize   = &PetscThreadFinalize_Main;
705       MainWait              = &MainWait_Main;
706       MainJob               = &MainJob_Main;
707       PetscInfo(PETSC_NULL,"Using main thread pool\n");
708       break;
709 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
710     case 3:
711 #else
712     default:
713 #endif
714       PetscThreadFunc       = &PetscThreadFunc_Chain;
715       PetscThreadInitialize = &PetscThreadInitialize_Chain;
716       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
717       MainWait              = &MainWait_Chain;
718       MainJob               = &MainJob_Chain;
719       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
720       break;
721 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
722     default:
723       PetscThreadFunc       = &PetscThreadFunc_True;
724       PetscThreadInitialize = &PetscThreadInitialize_True;
725       PetscThreadFinalize   = &PetscThreadFinalize_True;
726       MainWait              = &MainWait_True;
727       MainJob               = &MainJob_True;
728       PetscInfo(PETSC_NULL,"Using true thread pool\n");
729       break;
730 #endif
731     }
732     PetscThreadInitialize(PetscMaxThreads);
733   } else {
734     //need to define these in the case on 'no threads' or 'thread create/destroy'
735     //could take any of the above versions
736     MainJob               = &MainJob_Spawn;
737   }
738 #endif
739   /*
740        Print basic help message
741   */
742   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
743   if (flg1) {
744     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
763     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
764     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
768     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
769     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
770     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
771     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
773     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
776     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
777     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
778 #if defined(PETSC_USE_LOG)
779     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
780     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
781     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
782 #if defined(PETSC_HAVE_MPE)
783     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
784 #endif
785     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
786 #endif
787     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
788     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
789     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
790     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
791   }
792 
793   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
794   if (flg1) {
795     ierr = PetscSleep(si);CHKERRQ(ierr);
796   }
797 
798   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
799   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
800   if (f) {
801     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
802   }
803 
804 #if defined(PETSC_HAVE_CUSP)
805   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
806   if (flg3) flg1 = PETSC_TRUE;
807   else flg1 = PETSC_FALSE;
808   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
809   if (flg1) synchronizeCUSP = PETSC_TRUE;
810 #endif
811 
812   PetscFunctionReturn(0);
813 }
814 
815 #if defined(PETSC_HAVE_PTHREADCLASSES)
816 
817 /**** 'Tree' Thread Pool Functions ****/
818 void* PetscThreadFunc_Tree(void* arg) {
819   PetscErrorCode iterr;
820   int icorr,ierr;
821   int* pId = (int*)arg;
822   int ThreadId = *pId,Mary = 2,i,SubWorker;
823   PetscBool PeeOn;
824   cpu_set_t mset;
825   //printf("Thread %d In Tree Thread Function\n",ThreadId);
826   icorr = ThreadCoreAffinity[ThreadId];
827   CPU_ZERO(&mset);
828   CPU_SET(icorr,&mset);
829   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
830 
831   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
832     PeeOn = PETSC_TRUE;
833   }
834   else {
835     PeeOn = PETSC_FALSE;
836   }
837   if(PeeOn==PETSC_FALSE) {
838     /* check your subordinates, wait for them to be ready */
839     for(i=1;i<=Mary;i++) {
840       SubWorker = Mary*ThreadId+i;
841       if(SubWorker<PetscMaxThreads) {
842         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
843         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
844           /* upon entry, automically releases the lock and blocks
845            upon return, has the lock */
846           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
847         }
848         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
849       }
850     }
851     /* your subordinates are now ready */
852   }
853   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
854   /* update your ready status */
855   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
856   if(ThreadId==0) {
857     job_tree.eJobStat = JobCompleted;
858     /* ignal main */
859     ierr = pthread_cond_signal(&main_cond);
860   }
861   else {
862     /* tell your boss that you're ready to work */
863     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
864   }
865   /* the while loop needs to have an exit
866   the 'main' thread can terminate all the threads by performing a broadcast
867    and calling FuncFinish */
868   while(PetscThreadGo) {
869     /*need to check the condition to ensure we don't have to wait
870       waiting when you don't have to causes problems
871      also need to check the condition to ensure proper handling of spurious wakeups */
872     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
873       /* upon entry, automically releases the lock and blocks
874        upon return, has the lock */
875         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
876 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
877 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
878     }
879     if(ThreadId==0) {
880       job_tree.startJob = PETSC_FALSE;
881       job_tree.eJobStat = ThreadsWorking;
882     }
883     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
884     if(PeeOn==PETSC_FALSE) {
885       /* tell your subordinates it's time to get to work */
886       for(i=1; i<=Mary; i++) {
887 	SubWorker = Mary*ThreadId+i;
888         if(SubWorker<PetscMaxThreads) {
889           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
890         }
891       }
892     }
893     /* do your job */
894     if(job_tree.pdata==NULL) {
895       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
896     }
897     else {
898       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
899     }
900     if(iterr!=0) {
901       ithreaderr = 1;
902     }
903     if(PetscThreadGo) {
904       /* reset job, get ready for more */
905       if(PeeOn==PETSC_FALSE) {
906         /* check your subordinates, waiting for them to be ready
907          how do you know for a fact that a given subordinate has actually started? */
908 	for(i=1;i<=Mary;i++) {
909 	  SubWorker = Mary*ThreadId+i;
910           if(SubWorker<PetscMaxThreads) {
911             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
912             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
913               /* upon entry, automically releases the lock and blocks
914                upon return, has the lock */
915               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
916             }
917             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
918           }
919 	}
920         /* your subordinates are now ready */
921       }
922       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
923       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
924       if(ThreadId==0) {
925 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
926         /* root thread signals 'main' */
927         ierr = pthread_cond_signal(&main_cond);
928       }
929       else {
930         /* signal your boss before you go to sleep */
931         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
932       }
933     }
934   }
935   return NULL;
936 }
937 
938 #undef __FUNCT__
939 #define __FUNCT__ "PetscThreadInitialize_Tree"
940 void* PetscThreadInitialize_Tree(PetscInt N) {
941   PetscInt i,ierr;
942   int status;
943 
944   if(PetscUseThreadPool) {
945     size_t Val1 = (size_t)CACHE_LINE_SIZE;
946     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
947     arrmutex = (char*)memalign(Val1,Val2);
948     arrcond1 = (char*)memalign(Val1,Val2);
949     arrcond2 = (char*)memalign(Val1,Val2);
950     arrstart = (char*)memalign(Val1,Val2);
951     arrready = (char*)memalign(Val1,Val2);
952     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
953     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
954     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
955     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
956     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
957     /* initialize job structure */
958     for(i=0; i<PetscMaxThreads; i++) {
959       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
960       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
961       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
962       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
963       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
964     }
965     for(i=0; i<PetscMaxThreads; i++) {
966       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
967       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
968       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
969       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
970       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
971     }
972     job_tree.pfunc = NULL;
973     job_tree.pdata = (void**)malloc(N*sizeof(void*));
974     job_tree.startJob = PETSC_FALSE;
975     job_tree.eJobStat = JobInitiated;
976     pVal = (int*)malloc(N*sizeof(int));
977     /* allocate memory in the heap for the thread structure */
978     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
979     /* create threads */
980     for(i=0; i<N; i++) {
981       pVal[i] = i;
982       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
983       /* should check status */
984     }
985   }
986   return NULL;
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "PetscThreadFinalize_Tree"
991 PetscErrorCode PetscThreadFinalize_Tree() {
992   int i,ierr;
993   void* jstatus;
994 
995   PetscFunctionBegin;
996 
997   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
998   /* join the threads */
999   for(i=0; i<PetscMaxThreads; i++) {
1000     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1001     /* do error checking*/
1002   }
1003   free(PetscThreadPoint);
1004   free(arrmutex);
1005   free(arrcond1);
1006   free(arrcond2);
1007   free(arrstart);
1008   free(arrready);
1009   free(job_tree.pdata);
1010   free(pVal);
1011 
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MainWait_Tree"
1017 void MainWait_Tree() {
1018   int ierr;
1019   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1020   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1021     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1022   }
1023   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1024 }
1025 
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MainJob_Tree"
1028 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1029   int i,ierr;
1030   PetscErrorCode ijoberr = 0;
1031 
1032   MainWait();
1033   job_tree.pfunc = pFunc;
1034   job_tree.pdata = data;
1035   job_tree.startJob = PETSC_TRUE;
1036   for(i=0; i<PetscMaxThreads; i++) {
1037     *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1038   }
1039   job_tree.eJobStat = JobInitiated;
1040   ierr = pthread_cond_signal(job_tree.cond2array[0]);
1041   if(pFunc!=FuncFinish) {
1042     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1043   }
1044 
1045   if(ithreaderr) {
1046     ijoberr = ithreaderr;
1047   }
1048   return ijoberr;
1049 }
1050 /****  ****/
1051 
1052 /**** 'Main' Thread Pool Functions ****/
1053 void* PetscThreadFunc_Main(void* arg) {
1054   PetscErrorCode iterr;
1055   int icorr,ierr;
1056   int* pId = (int*)arg;
1057   int ThreadId = *pId;
1058   cpu_set_t mset;
1059   //printf("Thread %d In Main Thread Function\n",ThreadId);
1060   icorr = ThreadCoreAffinity[ThreadId];
1061   CPU_ZERO(&mset);
1062   CPU_SET(icorr,&mset);
1063   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1064 
1065   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1066   /* update your ready status */
1067   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1068   /* tell the BOSS that you're ready to work before you go to sleep */
1069   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1070 
1071   /* the while loop needs to have an exit
1072      the 'main' thread can terminate all the threads by performing a broadcast
1073      and calling FuncFinish */
1074   while(PetscThreadGo) {
1075     /* need to check the condition to ensure we don't have to wait
1076        waiting when you don't have to causes problems
1077      also need to check the condition to ensure proper handling of spurious wakeups */
1078     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1079       /* upon entry, atomically releases the lock and blocks
1080        upon return, has the lock */
1081         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1082 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1083     }
1084     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1085     if(job_main.pdata==NULL) {
1086       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1087     }
1088     else {
1089       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1090     }
1091     if(iterr!=0) {
1092       ithreaderr = 1;
1093     }
1094     if(PetscThreadGo) {
1095       /* reset job, get ready for more */
1096       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1097       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1098       /* tell the BOSS that you're ready to work before you go to sleep */
1099       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1100     }
1101   }
1102   return NULL;
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "PetscThreadInitialize_Main"
1107 void* PetscThreadInitialize_Main(PetscInt N) {
1108   PetscInt i,ierr;
1109   int status;
1110 
1111   if(PetscUseThreadPool) {
1112     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1113     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1114     arrmutex = (char*)memalign(Val1,Val2);
1115     arrcond1 = (char*)memalign(Val1,Val2);
1116     arrcond2 = (char*)memalign(Val1,Val2);
1117     arrstart = (char*)memalign(Val1,Val2);
1118     arrready = (char*)memalign(Val1,Val2);
1119     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1120     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1121     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1122     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1123     /* initialize job structure */
1124     for(i=0; i<PetscMaxThreads; i++) {
1125       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1126       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1127       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1128       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1129     }
1130     for(i=0; i<PetscMaxThreads; i++) {
1131       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1132       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1133       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1134       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1135     }
1136     job_main.pfunc = NULL;
1137     job_main.pdata = (void**)malloc(N*sizeof(void*));
1138     pVal = (int*)malloc(N*sizeof(int));
1139     /* allocate memory in the heap for the thread structure */
1140     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1141     /* create threads */
1142     for(i=0; i<N; i++) {
1143       pVal[i] = i;
1144       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1145       /* error check */
1146     }
1147   }
1148   else {
1149   }
1150   return NULL;
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "PetscThreadFinalize_Main"
1155 PetscErrorCode PetscThreadFinalize_Main() {
1156   int i,ierr;
1157   void* jstatus;
1158 
1159   PetscFunctionBegin;
1160 
1161   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1162   /* join the threads */
1163   for(i=0; i<PetscMaxThreads; i++) {
1164     ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1165   }
1166   free(PetscThreadPoint);
1167   free(arrmutex);
1168   free(arrcond1);
1169   free(arrcond2);
1170   free(arrstart);
1171   free(arrready);
1172   free(job_main.pdata);
1173   free(pVal);
1174 
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MainWait_Main"
1180 void MainWait_Main() {
1181   int i,ierr;
1182   for(i=0; i<PetscMaxThreads; i++) {
1183     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1184     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1185       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1186     }
1187     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1188   }
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MainJob_Main"
1193 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1194   int i,ierr;
1195   PetscErrorCode ijoberr = 0;
1196 
1197   MainWait(); /* you know everyone is waiting to be signalled! */
1198   job_main.pfunc = pFunc;
1199   job_main.pdata = data;
1200   for(i=0; i<PetscMaxThreads; i++) {
1201     *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1202   }
1203   /* tell the threads to go to work */
1204   for(i=0; i<PetscMaxThreads; i++) {
1205     ierr = pthread_cond_signal(job_main.cond2array[i]);
1206   }
1207   if(pFunc!=FuncFinish) {
1208     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1209   }
1210 
1211   if(ithreaderr) {
1212     ijoberr = ithreaderr;
1213   }
1214   return ijoberr;
1215 }
1216 /****  ****/
1217 
1218 /**** Chain Thread Functions ****/
1219 void* PetscThreadFunc_Chain(void* arg) {
1220   PetscErrorCode iterr;
1221   int icorr,ierr;
1222   int* pId = (int*)arg;
1223   int ThreadId = *pId;
1224   int SubWorker = ThreadId + 1;
1225   PetscBool PeeOn;
1226   cpu_set_t mset;
1227   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1228   icorr = ThreadCoreAffinity[ThreadId];
1229   CPU_ZERO(&mset);
1230   CPU_SET(icorr,&mset);
1231   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1232 
1233   if(ThreadId==(PetscMaxThreads-1)) {
1234     PeeOn = PETSC_TRUE;
1235   }
1236   else {
1237     PeeOn = PETSC_FALSE;
1238   }
1239   if(PeeOn==PETSC_FALSE) {
1240     /* check your subordinate, wait for him to be ready */
1241     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1242     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1243       /* upon entry, automically releases the lock and blocks
1244        upon return, has the lock */
1245       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1246     }
1247     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1248     /* your subordinate is now ready*/
1249   }
1250   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1251   /* update your ready status */
1252   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1253   if(ThreadId==0) {
1254     job_chain.eJobStat = JobCompleted;
1255     /* signal main */
1256     ierr = pthread_cond_signal(&main_cond);
1257   }
1258   else {
1259     /* tell your boss that you're ready to work */
1260     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1261   }
1262   /*  the while loop needs to have an exit
1263      the 'main' thread can terminate all the threads by performing a broadcast
1264    and calling FuncFinish */
1265   while(PetscThreadGo) {
1266     /* need to check the condition to ensure we don't have to wait
1267        waiting when you don't have to causes problems
1268      also need to check the condition to ensure proper handling of spurious wakeups */
1269     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1270       /*upon entry, automically releases the lock and blocks
1271        upon return, has the lock */
1272         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1273 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1274 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1275     }
1276     if(ThreadId==0) {
1277       job_chain.startJob = PETSC_FALSE;
1278       job_chain.eJobStat = ThreadsWorking;
1279     }
1280     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1281     if(PeeOn==PETSC_FALSE) {
1282       /* tell your subworker it's time to get to work */
1283       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1284     }
1285     /* do your job */
1286     if(job_chain.pdata==NULL) {
1287       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1288     }
1289     else {
1290       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1291     }
1292     if(iterr!=0) {
1293       ithreaderr = 1;
1294     }
1295     if(PetscThreadGo) {
1296       /* reset job, get ready for more */
1297       if(PeeOn==PETSC_FALSE) {
1298         /* check your subordinate, wait for him to be ready
1299          how do you know for a fact that your subordinate has actually started? */
1300         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1301         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1302           /* upon entry, automically releases the lock and blocks
1303            upon return, has the lock */
1304           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1305         }
1306         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1307         /* your subordinate is now ready */
1308       }
1309       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1310       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1311       if(ThreadId==0) {
1312 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1313         /* root thread (foreman) signals 'main' */
1314         ierr = pthread_cond_signal(&main_cond);
1315       }
1316       else {
1317         /* signal your boss before you go to sleep */
1318         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1319       }
1320     }
1321   }
1322   return NULL;
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "PetscThreadInitialize_Chain"
1327 void* PetscThreadInitialize_Chain(PetscInt N) {
1328   PetscInt i,ierr;
1329   int status;
1330 
1331   if(PetscUseThreadPool) {
1332     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1333     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1334     arrmutex = (char*)memalign(Val1,Val2);
1335     arrcond1 = (char*)memalign(Val1,Val2);
1336     arrcond2 = (char*)memalign(Val1,Val2);
1337     arrstart = (char*)memalign(Val1,Val2);
1338     arrready = (char*)memalign(Val1,Val2);
1339     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1340     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1341     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1342     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1343     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1344     /* initialize job structure */
1345     for(i=0; i<PetscMaxThreads; i++) {
1346       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1347       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1348       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1349       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1350       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1351     }
1352     for(i=0; i<PetscMaxThreads; i++) {
1353       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1354       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1355       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1356       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1357       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1358     }
1359     job_chain.pfunc = NULL;
1360     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1361     job_chain.startJob = PETSC_FALSE;
1362     job_chain.eJobStat = JobInitiated;
1363     pVal = (int*)malloc(N*sizeof(int));
1364     /* allocate memory in the heap for the thread structure */
1365     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1366     /* create threads */
1367     for(i=0; i<N; i++) {
1368       pVal[i] = i;
1369       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1370       /* should check error */
1371     }
1372   }
1373   else {
1374   }
1375   return NULL;
1376 }
1377 
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "PetscThreadFinalize_Chain"
1381 PetscErrorCode PetscThreadFinalize_Chain() {
1382   int i,ierr;
1383   void* jstatus;
1384 
1385   PetscFunctionBegin;
1386 
1387   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1388   /* join the threads */
1389   for(i=0; i<PetscMaxThreads; i++) {
1390     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1391     /* should check error */
1392   }
1393   free(PetscThreadPoint);
1394   free(arrmutex);
1395   free(arrcond1);
1396   free(arrcond2);
1397   free(arrstart);
1398   free(arrready);
1399   free(job_chain.pdata);
1400   free(pVal);
1401 
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MainWait_Chain"
1407 void MainWait_Chain() {
1408   int ierr;
1409   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1410   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1411     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1412   }
1413   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MainJob_Chain"
1418 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1419   int i,ierr;
1420   PetscErrorCode ijoberr = 0;
1421 
1422   MainWait();
1423   job_chain.pfunc = pFunc;
1424   job_chain.pdata = data;
1425   job_chain.startJob = PETSC_TRUE;
1426   for(i=0; i<PetscMaxThreads; i++) {
1427     *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1428   }
1429   job_chain.eJobStat = JobInitiated;
1430   ierr = pthread_cond_signal(job_chain.cond2array[0]);
1431   if(pFunc!=FuncFinish) {
1432     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1433   }
1434 
1435   if(ithreaderr) {
1436     ijoberr = ithreaderr;
1437   }
1438   return ijoberr;
1439 }
1440 /****  ****/
1441 
1442 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1443 /**** True Thread Functions ****/
1444 void* PetscThreadFunc_True(void* arg) {
1445   int icorr,ierr,iVal;
1446   int* pId = (int*)arg;
1447   int ThreadId = *pId;
1448   PetscErrorCode iterr;
1449   cpu_set_t mset;
1450   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1451   icorr = ThreadCoreAffinity[ThreadId];
1452   CPU_ZERO(&mset);
1453   CPU_SET(icorr,&mset);
1454   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1455 
1456   ierr = pthread_mutex_lock(&job_true.mutex);
1457   job_true.iNumReadyThreads++;
1458   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1459     ierr = pthread_cond_signal(&main_cond);
1460   }
1461   /*the while loop needs to have an exit
1462     the 'main' thread can terminate all the threads by performing a broadcast
1463    and calling FuncFinish */
1464   while(PetscThreadGo) {
1465     /*need to check the condition to ensure we don't have to wait
1466       waiting when you don't have to causes problems
1467      also need to wait if another thread sneaks in and messes with the predicate */
1468     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1469       /* upon entry, automically releases the lock and blocks
1470        upon return, has the lock */
1471       //printf("Thread %d Going to Sleep!\n",ThreadId);
1472       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1473     }
1474     job_true.startJob = PETSC_FALSE;
1475     job_true.iNumJobThreads--;
1476     job_true.iNumReadyThreads--;
1477     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1478     pthread_mutex_unlock(&job_true.mutex);
1479     if(job_true.pdata==NULL) {
1480       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1481     }
1482     else {
1483       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1484     }
1485     if(iterr!=0) {
1486       ithreaderr = 1;
1487     }
1488     //printf("Thread %d Finished Job\n",ThreadId);
1489     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1490       what happens if a thread finishes before they all start? BAD!
1491      what happens if a thread finishes before any else start? BAD! */
1492     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1493     /* reset job */
1494     if(PetscThreadGo) {
1495       pthread_mutex_lock(&job_true.mutex);
1496       job_true.iNumReadyThreads++;
1497       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1498 	/* signal the 'main' thread that the job is done! (only done once) */
1499 	ierr = pthread_cond_signal(&main_cond);
1500       }
1501     }
1502   }
1503   return NULL;
1504 }
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "PetscThreadInitialize_True"
1508 void* PetscThreadInitialize_True(PetscInt N) {
1509   PetscInt i;
1510   int status;
1511 
1512   pVal = (int*)malloc(N*sizeof(int));
1513   /* allocate memory in the heap for the thread structure */
1514   PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1515   BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1516   job_true.pdata = (void**)malloc(N*sizeof(void*));
1517   for(i=0; i<N; i++) {
1518     pVal[i] = i;
1519     status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1520     /* error check to ensure proper thread creation */
1521     status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1522     /* should check error */
1523   }
1524   //printf("Finished True Thread Pool Initialization\n");
1525   return NULL;
1526 }
1527 
1528 
1529 #undef __FUNCT__
1530 #define __FUNCT__ "PetscThreadFinalize_True"
1531 PetscErrorCode PetscThreadFinalize_True() {
1532   int i,ierr;
1533   void* jstatus;
1534 
1535   PetscFunctionBegin;
1536 
1537   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1538   /* join the threads */
1539   for(i=0; i<PetscMaxThreads; i++) {
1540     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1541   }
1542   free(BarrPoint);
1543   free(PetscThreadPoint);
1544 
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MainWait_True"
1550 void MainWait_True() {
1551   int ierr;
1552   ierr = pthread_mutex_lock(&job_true.mutex);
1553   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1554     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1555   }
1556   ierr = pthread_mutex_unlock(&job_true.mutex);
1557 }
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MainJob_True"
1561 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1562   int ierr;
1563   PetscErrorCode ijoberr = 0;
1564 
1565   MainWait();
1566   job_true.pfunc = pFunc;
1567   job_true.pdata = data;
1568   job_true.pbarr = &BarrPoint[n];
1569   job_true.iNumJobThreads = n;
1570   job_true.startJob = PETSC_TRUE;
1571   ierr = pthread_cond_broadcast(&job_true.cond);
1572   if(pFunc!=FuncFinish) {
1573     MainWait(); /* why wait after? guarantees that job gets done */
1574   }
1575 
1576   if(ithreaderr) {
1577     ijoberr = ithreaderr;
1578   }
1579   return ijoberr;
1580 }
1581 /**** NO THREAD POOL FUNCTION ****/
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MainJob_Spawn"
1584 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1585   PetscErrorCode ijoberr = 0;
1586 
1587   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1588   PetscThreadPoint = apThread; /* point to same place */
1589   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1590   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1591   free(apThread);
1592 
1593   return ijoberr;
1594 }
1595 /****  ****/
1596 #endif
1597 
1598 void* FuncFinish(void* arg) {
1599   PetscThreadGo = PETSC_FALSE;
1600   return(0);
1601 }
1602 
1603 #endif
1604