xref: /petsc/src/sys/objects/init.c (revision 5337c6f8edd8406cf53e5b392fbb7e258778b84a)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #include <petscsys.h>        /*I  "petscsys.h"   I*/
10 #if defined(PETSC_HAVE_SCHED_H) && defined(PETSC_USE_PTHREAD)
11 #ifndef _GNU_SOURCE
12 #define _GNU_SOURCE
13 #endif
14 #include <sched.h>
15 #endif
16 #if defined(PETSC_USE_PTHREAD)
17 #include <pthread.h>
18 #endif
19 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
20 #include <sys/sysinfo.h>
21 #endif
22 #include <unistd.h>
23 #if defined(PETSC_HAVE_STDLIB_H)
24 #include <stdlib.h>
25 #endif
26 #if defined(PETSC_HAVE_MALLOC_H)
27 #include <malloc.h>
28 #endif
29 #if defined(PETSC_HAVE_VALGRIND)
30 #include <valgrind/valgrind.h>
31 #endif
32 
33 /* ------------------------Nasty global variables -------------------------------*/
34 /*
35      Indicates if PETSc started up MPI, or it was
36    already started before PETSc was initialized.
37 */
38 PetscBool    PetscBeganMPI         = PETSC_FALSE;
39 PetscBool    PetscInitializeCalled = PETSC_FALSE;
40 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
41 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
42 PetscBool    PetscThreadGo         = PETSC_TRUE;
43 PetscMPIInt  PetscGlobalRank = -1;
44 PetscMPIInt  PetscGlobalSize = -1;
45 
46 #if defined(PETSC_USE_PTHREAD_CLASSES)
47 PetscMPIInt  PetscMaxThreads = 2;
48 pthread_t*   PetscThreadPoint;
49 #define PETSC_HAVE_PTHREAD_BARRIER
50 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
51 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
52 #endif
53 PetscErrorCode ithreaderr = 0;
54 int*         pVal;
55 
56 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
57 int* ThreadCoreAffinity;
58 
59 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
60 
61 typedef struct {
62   pthread_mutex_t** mutexarray;
63   pthread_cond_t**  cond1array;
64   pthread_cond_t** cond2array;
65   void* (*pfunc)(void*);
66   void** pdata;
67   PetscBool startJob;
68   estat eJobStat;
69   PetscBool** arrThreadStarted;
70   PetscBool** arrThreadReady;
71 } sjob_tree;
72 sjob_tree job_tree;
73 typedef struct {
74   pthread_mutex_t** mutexarray;
75   pthread_cond_t**  cond1array;
76   pthread_cond_t** cond2array;
77   void* (*pfunc)(void*);
78   void** pdata;
79   PetscBool** arrThreadReady;
80 } sjob_main;
81 sjob_main job_main;
82 typedef struct {
83   pthread_mutex_t** mutexarray;
84   pthread_cond_t**  cond1array;
85   pthread_cond_t** cond2array;
86   void* (*pfunc)(void*);
87   void** pdata;
88   PetscBool startJob;
89   estat eJobStat;
90   PetscBool** arrThreadStarted;
91   PetscBool** arrThreadReady;
92 } sjob_chain;
93 sjob_chain job_chain;
94 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
95 typedef struct {
96   pthread_mutex_t mutex;
97   pthread_cond_t cond;
98   void* (*pfunc)(void*);
99   void** pdata;
100   pthread_barrier_t* pbarr;
101   int iNumJobThreads;
102   int iNumReadyThreads;
103   PetscBool startJob;
104 } sjob_true;
105 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
106 #endif
107 
108 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
109 char* arrmutex; /* used by 'chain','main','tree' thread pools */
110 char* arrcond1; /* used by 'chain','main','tree' thread pools */
111 char* arrcond2; /* used by 'chain','main','tree' thread pools */
112 char* arrstart; /* used by 'chain','main','tree' thread pools */
113 char* arrready; /* used by 'chain','main','tree' thread pools */
114 
115 /* Function Pointers */
116 void*          (*PetscThreadFunc)(void*) = NULL;
117 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
118 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
119 void           (*MainWait)(void) = NULL;
120 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
121 /**** Tree Thread Pool Functions ****/
122 void*          PetscThreadFunc_Tree(void*);
123 void*          PetscThreadInitialize_Tree(PetscInt);
124 PetscErrorCode PetscThreadFinalize_Tree(void);
125 void           MainWait_Tree(void);
126 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
127 /**** Main Thread Pool Functions ****/
128 void*          PetscThreadFunc_Main(void*);
129 void*          PetscThreadInitialize_Main(PetscInt);
130 PetscErrorCode PetscThreadFinalize_Main(void);
131 void           MainWait_Main(void);
132 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
133 /**** Chain Thread Pool Functions ****/
134 void*          PetscThreadFunc_Chain(void*);
135 void*          PetscThreadInitialize_Chain(PetscInt);
136 PetscErrorCode PetscThreadFinalize_Chain(void);
137 void           MainWait_Chain(void);
138 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
139 /**** True Thread Pool Functions ****/
140 void*          PetscThreadFunc_True(void*);
141 void*          PetscThreadInitialize_True(PetscInt);
142 PetscErrorCode PetscThreadFinalize_True(void);
143 void           MainWait_True(void);
144 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
145 /**** NO Thread Pool Function  ****/
146 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
147 /****  ****/
148 void* FuncFinish(void*);
149 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
150 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
151 #endif
152 
153 #if defined(PETSC_USE_COMPLEX)
154 #if defined(PETSC_COMPLEX_INSTANTIATE)
155 template <> class std::complex<double>; /* instantiate complex template class */
156 #endif
157 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
158 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
159 MPI_Datatype   MPI_C_COMPLEX;
160 #endif
161 PetscScalar    PETSC_i;
162 #else
163 PetscScalar    PETSC_i = 0.0;
164 #endif
165 #if defined(PETSC_USE_REAL___FLOAT128)
166 MPI_Datatype   MPIU___FLOAT128 = 0;
167 #endif
168 MPI_Datatype   MPIU_2SCALAR = 0;
169 MPI_Datatype   MPIU_2INT = 0;
170 
171 /*
172      These are needed by petscbt.h
173 */
174 #include <petscbt.h>
175 char      _BT_mask = ' ';
176 char      _BT_c = ' ';
177 PetscInt  _BT_idx  = 0;
178 
179 /*
180        Function that is called to display all error messages
181 */
182 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
183 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
184 #if defined(PETSC_HAVE_MATLAB_ENGINE)
185 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
186 #else
187 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
188 #endif
189 /*
190   This is needed to turn on/off cusp synchronization */
191 PetscBool   synchronizeCUSP = PETSC_FALSE;
192 
193 /* ------------------------------------------------------------------------------*/
194 /*
195    Optional file where all PETSc output from various prints is saved
196 */
197 FILE *petsc_history = PETSC_NULL;
198 
199 #undef __FUNCT__
200 #define __FUNCT__ "PetscOpenHistoryFile"
201 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
202 {
203   PetscErrorCode ierr;
204   PetscMPIInt    rank,size;
205   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
206   char           version[256];
207 
208   PetscFunctionBegin;
209   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
210   if (!rank) {
211     char        arch[10];
212     int         err;
213     PetscViewer viewer;
214 
215     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
216     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
217     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
218     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
219     if (filename) {
220       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
221     } else {
222       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
223       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
224       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
225     }
226 
227     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
228     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
229     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
230     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
231     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
232     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
233     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
234     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
235     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
236     err = fflush(*fd);
237     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "PetscCloseHistoryFile"
244 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
245 {
246   PetscErrorCode ierr;
247   PetscMPIInt    rank;
248   char           date[64];
249   int            err;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
253   if (!rank) {
254     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
255     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
256     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
257     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
258     err = fflush(*fd);
259     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
260     err = fclose(*fd);
261     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
262   }
263   PetscFunctionReturn(0);
264 }
265 
266 /* ------------------------------------------------------------------------------*/
267 
268 /*
269    This is ugly and probably belongs somewhere else, but I want to
270   be able to put a true MPI abort error handler with command line args.
271 
272     This is so MPI errors in the debugger will leave all the stack
273   frames. The default MP_Abort() cleans up and exits thus providing no useful information
274   in the debugger hence we call abort() instead of MPI_Abort().
275 */
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "Petsc_MPI_AbortOnError"
279 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
280 {
281   PetscFunctionBegin;
282   (*PetscErrorPrintf)("MPI error %d\n",*flag);
283   abort();
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
288 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
289 {
290   PetscErrorCode ierr;
291 
292   PetscFunctionBegin;
293   (*PetscErrorPrintf)("MPI error %d\n",*flag);
294   ierr = PetscAttachDebugger();
295   if (ierr) { /* hopeless so get out */
296     MPI_Abort(*comm,*flag);
297   }
298 }
299 
300 #undef __FUNCT__
301 #define __FUNCT__ "PetscEnd"
302 /*@C
303    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
304      wishes a clean exit somewhere deep in the program.
305 
306    Collective on PETSC_COMM_WORLD
307 
308    Options Database Keys are the same as for PetscFinalize()
309 
310    Level: advanced
311 
312    Note:
313    See PetscInitialize() for more general runtime options.
314 
315 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
316 @*/
317 PetscErrorCode  PetscEnd(void)
318 {
319   PetscFunctionBegin;
320   PetscFinalize();
321   exit(0);
322   return 0;
323 }
324 
325 PetscBool    PetscOptionsPublish = PETSC_FALSE;
326 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
327 extern PetscBool  petscsetmallocvisited;
328 static char       emacsmachinename[256];
329 
330 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
331 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
332 
333 #undef __FUNCT__
334 #define __FUNCT__ "PetscSetHelpVersionFunctions"
335 /*@C
336    PetscSetHelpVersionFunctions - Sets functions that print help and version information
337    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
338    This routine enables a "higher-level" package that uses PETSc to print its messages first.
339 
340    Input Parameter:
341 +  help - the help function (may be PETSC_NULL)
342 -  version - the version function (may be PETSC_NULL)
343 
344    Level: developer
345 
346    Concepts: package help message
347 
348 @*/
349 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
350 {
351   PetscFunctionBegin;
352   PetscExternalHelpFunction    = help;
353   PetscExternalVersionFunction = version;
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
359 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
360 {
361   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
362   MPI_Comm       comm = PETSC_COMM_WORLD;
363   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
364   PetscErrorCode ierr;
365   PetscReal      si;
366   int            i;
367   PetscMPIInt    rank;
368   char           version[256];
369 
370   PetscFunctionBegin;
371   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
372 
373   /*
374       Setup the memory management; support for tracing malloc() usage
375   */
376   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
377 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
378   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
379   if ((!flg2 || flg1) && !petscsetmallocvisited) {
380 #if defined(PETSC_HAVE_VALGRIND)
381     if (flg2 || !(RUNNING_ON_VALGRIND)) {
382       /* turn off default -malloc if valgrind is being used */
383 #endif
384       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
385 #if defined(PETSC_HAVE_VALGRIND)
386     }
387 #endif
388   }
389 #else
390   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
391   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
392   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
393 #endif
394   if (flg3) {
395     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
396   }
397   flg1 = PETSC_FALSE;
398   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
399   if (flg1) {
400     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
401     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
402   }
403 
404   flg1 = PETSC_FALSE;
405   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
406   if (!flg1) {
407     flg1 = PETSC_FALSE;
408     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
409   }
410   if (flg1) {
411     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
412   }
413 
414   /*
415       Set the display variable for graphics
416   */
417   ierr = PetscSetDisplay();CHKERRQ(ierr);
418 
419 
420   /*
421       Print the PETSc version information
422   */
423   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
424   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
425   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
426   if (flg1 || flg2 || flg3){
427 
428     /*
429        Print "higher-level" package version message
430     */
431     if (PetscExternalVersionFunction) {
432       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
433     }
434 
435     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
436     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
437 ------------------------------\n");CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
439     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
441     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
442     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
443     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
444     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
445 ------------------------------\n");CHKERRQ(ierr);
446   }
447 
448   /*
449        Print "higher-level" package help message
450   */
451   if (flg3){
452     if (PetscExternalHelpFunction) {
453       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
454     }
455   }
456 
457   /*
458       Setup the error handling
459   */
460   flg1 = PETSC_FALSE;
461   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
462   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
463   flg1 = PETSC_FALSE;
464   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
465   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
466   flg1 = PETSC_FALSE;
467   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
468   if (flg1) {
469     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
470   }
471   flg1 = PETSC_FALSE;
472   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
473   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
474   flg1 = PETSC_FALSE;
475   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
476   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
477 
478   /*
479       Setup debugger information
480   */
481   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
482   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
483   if (flg1) {
484     MPI_Errhandler err_handler;
485 
486     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
487     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
488     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
489     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
490   }
491   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
492   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
493   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
494   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
495   if (flg1 || flg2) {
496     PetscMPIInt    size;
497     PetscInt       lsize,*nodes;
498     MPI_Errhandler err_handler;
499     /*
500        we have to make sure that all processors have opened
501        connections to all other processors, otherwise once the
502        debugger has stated it is likely to receive a SIGUSR1
503        and kill the program.
504     */
505     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
506     if (size > 2) {
507       PetscMPIInt dummy = 0;
508       MPI_Status  status;
509       for (i=0; i<size; i++) {
510         if (rank != i) {
511           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
512         }
513       }
514       for (i=0; i<size; i++) {
515         if (rank != i) {
516           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
517         }
518       }
519     }
520     /* check if this processor node should be in debugger */
521     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
522     lsize = size;
523     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
524     if (flag) {
525       for (i=0; i<lsize; i++) {
526         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
527       }
528     }
529     if (!flag) {
530       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
531       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
532       if (flg1) {
533         ierr = PetscAttachDebugger();CHKERRQ(ierr);
534       } else {
535         ierr = PetscStopForDebugger();CHKERRQ(ierr);
536       }
537       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
538       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
539     }
540     ierr = PetscFree(nodes);CHKERRQ(ierr);
541   }
542 
543   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
544   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
545 
546 #if defined(PETSC_USE_SOCKET_VIEWER)
547   /*
548     Activates new sockets for zope if needed
549   */
550   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
551   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
552   if (flgz){
553     int  sockfd;
554     char hostname[256];
555     char username[256];
556     int  remoteport = 9999;
557 
558     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
559     if (!hostname[0]){
560       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
561     }
562     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
563     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
564     PETSC_ZOPEFD = fdopen(sockfd, "w");
565     if (flgzout){
566       PETSC_STDOUT = PETSC_ZOPEFD;
567       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
568       fprintf(PETSC_STDOUT, "<<<start>>>");
569     } else {
570       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
571       fprintf(PETSC_ZOPEFD, "<<<start>>>");
572     }
573   }
574 #endif
575 #if defined(PETSC_USE_SERVER)
576   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
577   if (flgz){
578     PetscInt port = PETSC_DECIDE;
579     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
580     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
581   }
582 #endif
583 
584   /*
585         Setup profiling and logging
586   */
587 #if defined (PETSC_USE_INFO)
588   {
589     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
590     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
591     if (flg1 && logname[0]) {
592       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
593     } else if (flg1) {
594       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
595     }
596   }
597 #endif
598 #if defined(PETSC_USE_LOG)
599   mname[0] = 0;
600   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
601   if (flg1) {
602     if (mname[0]) {
603       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
604     } else {
605       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
606     }
607   }
608 #if defined(PETSC_HAVE_MPE)
609   flg1 = PETSC_FALSE;
610   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
611   if (flg1) PetscLogMPEBegin();
612 #endif
613   flg1 = PETSC_FALSE;
614   flg2 = PETSC_FALSE;
615   flg3 = PETSC_FALSE;
616   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
617   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
618   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
619   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
620   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
621   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
622 
623   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
624   if (flg1) {
625     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
626     FILE *file;
627     if (mname[0]) {
628       sprintf(name,"%s.%d",mname,rank);
629       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
630       file = fopen(fname,"w");
631       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
632     } else {
633       file = PETSC_STDOUT;
634     }
635     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
636   }
637 #endif
638 
639   /*
640       Setup building of stack frames for all function calls
641   */
642 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
643   ierr = PetscStackCreate();CHKERRQ(ierr);
644 #endif
645 
646   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
647 
648 #if defined(PETSC_USE_PTHREAD_CLASSES)
649   /*
650       Determine whether user specified maximum number of threads
651    */
652   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
653 
654   ierr = PetscOptionsHasName(PETSC_NULL,"-main",&flg1);CHKERRQ(ierr);
655   if(flg1) {
656     cpu_set_t mset;
657     int icorr,ncorr = get_nprocs();
658     ierr = PetscOptionsGetInt(PETSC_NULL,"-main",&icorr,PETSC_NULL);CHKERRQ(ierr);
659     CPU_ZERO(&mset);
660     CPU_SET(icorr%ncorr,&mset);
661     sched_setaffinity(0,sizeof(cpu_set_t),&mset);
662   }
663 
664   /*
665       Determine whether to use thread pool
666    */
667   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
668   if (flg1) {
669     PetscUseThreadPool = PETSC_TRUE;
670     PetscInt N_CORES = get_nprocs();
671     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
672     char tstr[9];
673     char tbuf[2];
674     strcpy(tstr,"-thread");
675     for(i=0;i<PetscMaxThreads;i++) {
676       ThreadCoreAffinity[i] = i;
677       sprintf(tbuf,"%d",i);
678       strcat(tstr,tbuf);
679       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
680       if(flg1) {
681         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
682         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
683       }
684       tstr[7] = '\0';
685     }
686     /* get the thread pool type */
687     PetscInt ipool = 0;
688     const char *choices[4] = {"true","tree","main","chain"};
689 
690     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
691     switch(ipool) {
692     case 1:
693       PetscThreadFunc       = &PetscThreadFunc_Tree;
694       PetscThreadInitialize = &PetscThreadInitialize_Tree;
695       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
696       MainWait              = &MainWait_Tree;
697       MainJob               = &MainJob_Tree;
698       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
699       break;
700     case 2:
701       PetscThreadFunc       = &PetscThreadFunc_Main;
702       PetscThreadInitialize = &PetscThreadInitialize_Main;
703       PetscThreadFinalize   = &PetscThreadFinalize_Main;
704       MainWait              = &MainWait_Main;
705       MainJob               = &MainJob_Main;
706       PetscInfo(PETSC_NULL,"Using main thread pool\n");
707       break;
708 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
709     case 3:
710 #else
711     default:
712 #endif
713       PetscThreadFunc       = &PetscThreadFunc_Chain;
714       PetscThreadInitialize = &PetscThreadInitialize_Chain;
715       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
716       MainWait              = &MainWait_Chain;
717       MainJob               = &MainJob_Chain;
718       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
719       break;
720 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
721     default:
722       PetscThreadFunc       = &PetscThreadFunc_True;
723       PetscThreadInitialize = &PetscThreadInitialize_True;
724       PetscThreadFinalize   = &PetscThreadFinalize_True;
725       MainWait              = &MainWait_True;
726       MainJob               = &MainJob_True;
727       PetscInfo(PETSC_NULL,"Using true thread pool\n");
728       break;
729 #endif
730     }
731     PetscThreadInitialize(PetscMaxThreads);
732   } else {
733     //need to define these in the case on 'no threads' or 'thread create/destroy'
734     //could take any of the above versions
735     MainJob               = &MainJob_Spawn;
736   }
737 #endif
738   /*
739        Print basic help message
740   */
741   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
742   if (flg1) {
743     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
763     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
764     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
768     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
769     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
770     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
771     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
773     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
776     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
777 #if defined(PETSC_USE_LOG)
778     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
779     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
780     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
781 #if defined(PETSC_HAVE_MPE)
782     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
783 #endif
784     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
785 #endif
786     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
787     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
788     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
789     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
790   }
791 
792   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
793   if (flg1) {
794     ierr = PetscSleep(si);CHKERRQ(ierr);
795   }
796 
797   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
798   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
799   if (f) {
800     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
801   }
802 
803 #if defined(PETSC_HAVE_CUSP)
804   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
805   if (flg3) flg1 = PETSC_TRUE;
806   else flg1 = PETSC_FALSE;
807   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
808   if (flg1) synchronizeCUSP = PETSC_TRUE;
809 #endif
810 
811   PetscFunctionReturn(0);
812 }
813 
814 #if defined(PETSC_USE_PTHREAD_CLASSES)
815 
816 /**** 'Tree' Thread Pool Functions ****/
817 void* PetscThreadFunc_Tree(void* arg) {
818   PetscErrorCode iterr;
819   int icorr,ierr;
820   int* pId = (int*)arg;
821   int ThreadId = *pId,Mary = 2,i,SubWorker;
822   PetscBool PeeOn;
823   cpu_set_t mset;
824   //printf("Thread %d In Tree Thread Function\n",ThreadId);
825   icorr = ThreadCoreAffinity[ThreadId];
826   CPU_ZERO(&mset);
827   CPU_SET(icorr,&mset);
828   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
829 
830   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
831     PeeOn = PETSC_TRUE;
832   }
833   else {
834     PeeOn = PETSC_FALSE;
835   }
836   if(PeeOn==PETSC_FALSE) {
837     /* check your subordinates, wait for them to be ready */
838     for(i=1;i<=Mary;i++) {
839       SubWorker = Mary*ThreadId+i;
840       if(SubWorker<PetscMaxThreads) {
841         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
842         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
843           /* upon entry, automically releases the lock and blocks
844            upon return, has the lock */
845           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
846         }
847         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
848       }
849     }
850     /* your subordinates are now ready */
851   }
852   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
853   /* update your ready status */
854   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
855   if(ThreadId==0) {
856     job_tree.eJobStat = JobCompleted;
857     /* ignal main */
858     ierr = pthread_cond_signal(&main_cond);
859   }
860   else {
861     /* tell your boss that you're ready to work */
862     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
863   }
864   /* the while loop needs to have an exit
865   the 'main' thread can terminate all the threads by performing a broadcast
866    and calling FuncFinish */
867   while(PetscThreadGo) {
868     /*need to check the condition to ensure we don't have to wait
869       waiting when you don't have to causes problems
870      also need to check the condition to ensure proper handling of spurious wakeups */
871     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
872       /* upon entry, automically releases the lock and blocks
873        upon return, has the lock */
874         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
875 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
876 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
877     }
878     if(ThreadId==0) {
879       job_tree.startJob = PETSC_FALSE;
880       job_tree.eJobStat = ThreadsWorking;
881     }
882     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
883     if(PeeOn==PETSC_FALSE) {
884       /* tell your subordinates it's time to get to work */
885       for(i=1; i<=Mary; i++) {
886 	SubWorker = Mary*ThreadId+i;
887         if(SubWorker<PetscMaxThreads) {
888           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
889         }
890       }
891     }
892     /* do your job */
893     if(job_tree.pdata==NULL) {
894       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
895     }
896     else {
897       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
898     }
899     if(iterr!=0) {
900       ithreaderr = 1;
901     }
902     if(PetscThreadGo) {
903       /* reset job, get ready for more */
904       if(PeeOn==PETSC_FALSE) {
905         /* check your subordinates, waiting for them to be ready
906          how do you know for a fact that a given subordinate has actually started? */
907 	for(i=1;i<=Mary;i++) {
908 	  SubWorker = Mary*ThreadId+i;
909           if(SubWorker<PetscMaxThreads) {
910             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
911             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
912               /* upon entry, automically releases the lock and blocks
913                upon return, has the lock */
914               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
915             }
916             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
917           }
918 	}
919         /* your subordinates are now ready */
920       }
921       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
922       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
923       if(ThreadId==0) {
924 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
925         /* root thread signals 'main' */
926         ierr = pthread_cond_signal(&main_cond);
927       }
928       else {
929         /* signal your boss before you go to sleep */
930         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
931       }
932     }
933   }
934   return NULL;
935 }
936 
937 #undef __FUNCT__
938 #define __FUNCT__ "PetscThreadInitialize_Tree"
939 void* PetscThreadInitialize_Tree(PetscInt N) {
940   PetscInt i,ierr;
941   int status;
942 
943   if(PetscUseThreadPool) {
944     size_t Val1 = (size_t)CACHE_LINE_SIZE;
945     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
946     arrmutex = (char*)memalign(Val1,Val2);
947     arrcond1 = (char*)memalign(Val1,Val2);
948     arrcond2 = (char*)memalign(Val1,Val2);
949     arrstart = (char*)memalign(Val1,Val2);
950     arrready = (char*)memalign(Val1,Val2);
951     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
952     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
953     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
954     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
955     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
956     /* initialize job structure */
957     for(i=0; i<PetscMaxThreads; i++) {
958       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
959       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
960       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
961       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
962       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
963     }
964     for(i=0; i<PetscMaxThreads; i++) {
965       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
966       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
967       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
968       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
969       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
970     }
971     job_tree.pfunc = NULL;
972     job_tree.pdata = (void**)malloc(N*sizeof(void*));
973     job_tree.startJob = PETSC_FALSE;
974     job_tree.eJobStat = JobInitiated;
975     pVal = (int*)malloc(N*sizeof(int));
976     /* allocate memory in the heap for the thread structure */
977     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
978     /* create threads */
979     for(i=0; i<N; i++) {
980       pVal[i] = i;
981       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
982       /* should check status */
983     }
984   }
985   return NULL;
986 }
987 
988 #undef __FUNCT__
989 #define __FUNCT__ "PetscThreadFinalize_Tree"
990 PetscErrorCode PetscThreadFinalize_Tree() {
991   int i,ierr;
992   void* jstatus;
993 
994   PetscFunctionBegin;
995 
996   if(PetscUseThreadPool) {
997     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
998     /* join the threads */
999     for(i=0; i<PetscMaxThreads; i++) {
1000       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1001       /* do error checking*/
1002     }
1003     free(PetscThreadPoint);
1004     free(arrmutex);
1005     free(arrcond1);
1006     free(arrcond2);
1007     free(arrstart);
1008     free(arrready);
1009     free(job_tree.pdata);
1010     free(pVal);
1011   }
1012   else {
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MainWait_Tree"
1019 void MainWait_Tree() {
1020   int ierr;
1021   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1022   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1023     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1024   }
1025   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MainJob_Tree"
1030 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1031   int i,ierr;
1032   PetscErrorCode ijoberr = 0;
1033 
1034   MainWait();
1035   job_tree.pfunc = pFunc;
1036   job_tree.pdata = data;
1037   job_tree.startJob = PETSC_TRUE;
1038   for(i=0; i<PetscMaxThreads; i++) {
1039     *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1040   }
1041   job_tree.eJobStat = JobInitiated;
1042   ierr = pthread_cond_signal(job_tree.cond2array[0]);
1043   if(pFunc!=FuncFinish) {
1044     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1045   }
1046 
1047   if(ithreaderr) {
1048     ijoberr = ithreaderr;
1049   }
1050   return ijoberr;
1051 }
1052 /****  ****/
1053 
1054 /**** 'Main' Thread Pool Functions ****/
1055 void* PetscThreadFunc_Main(void* arg) {
1056   PetscErrorCode iterr;
1057   int icorr,ierr;
1058   int* pId = (int*)arg;
1059   int ThreadId = *pId;
1060   cpu_set_t mset;
1061   //printf("Thread %d In Main Thread Function\n",ThreadId);
1062   icorr = ThreadCoreAffinity[ThreadId];
1063   CPU_ZERO(&mset);
1064   CPU_SET(icorr,&mset);
1065   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1066 
1067   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1068   /* update your ready status */
1069   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1070   /* tell the BOSS that you're ready to work before you go to sleep */
1071   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1072 
1073   /* the while loop needs to have an exit
1074      the 'main' thread can terminate all the threads by performing a broadcast
1075      and calling FuncFinish */
1076   while(PetscThreadGo) {
1077     /* need to check the condition to ensure we don't have to wait
1078        waiting when you don't have to causes problems
1079      also need to check the condition to ensure proper handling of spurious wakeups */
1080     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1081       /* upon entry, atomically releases the lock and blocks
1082        upon return, has the lock */
1083         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1084 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1085     }
1086     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1087     if(job_main.pdata==NULL) {
1088       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1089     }
1090     else {
1091       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1092     }
1093     if(iterr!=0) {
1094       ithreaderr = 1;
1095     }
1096     if(PetscThreadGo) {
1097       /* reset job, get ready for more */
1098       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1099       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1100       /* tell the BOSS that you're ready to work before you go to sleep */
1101       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1102     }
1103   }
1104   return NULL;
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "PetscThreadInitialize_Main"
1109 void* PetscThreadInitialize_Main(PetscInt N) {
1110   PetscInt i,ierr;
1111   int status;
1112 
1113   if(PetscUseThreadPool) {
1114     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1115     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1116     arrmutex = (char*)memalign(Val1,Val2);
1117     arrcond1 = (char*)memalign(Val1,Val2);
1118     arrcond2 = (char*)memalign(Val1,Val2);
1119     arrstart = (char*)memalign(Val1,Val2);
1120     arrready = (char*)memalign(Val1,Val2);
1121     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1122     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1123     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1124     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1125     /* initialize job structure */
1126     for(i=0; i<PetscMaxThreads; i++) {
1127       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1128       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1129       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1130       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1131     }
1132     for(i=0; i<PetscMaxThreads; i++) {
1133       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1134       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1135       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1136       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1137     }
1138     job_main.pfunc = NULL;
1139     job_main.pdata = (void**)malloc(N*sizeof(void*));
1140     pVal = (int*)malloc(N*sizeof(int));
1141     /* allocate memory in the heap for the thread structure */
1142     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1143     /* create threads */
1144     for(i=0; i<N; i++) {
1145       pVal[i] = i;
1146       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1147       /* error check */
1148     }
1149   }
1150   else {
1151   }
1152   return NULL;
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "PetscThreadFinalize_Main"
1157 PetscErrorCode PetscThreadFinalize_Main() {
1158   int i,ierr;
1159   void* jstatus;
1160 
1161   PetscFunctionBegin;
1162 
1163   if(PetscUseThreadPool) {
1164     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1165     /* join the threads */
1166     for(i=0; i<PetscMaxThreads; i++) {
1167       ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1168     }
1169     free(PetscThreadPoint);
1170     free(arrmutex);
1171     free(arrcond1);
1172     free(arrcond2);
1173     free(arrstart);
1174     free(arrready);
1175     free(job_main.pdata);
1176     free(pVal);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MainWait_Main"
1183 void MainWait_Main() {
1184   int i,ierr;
1185   for(i=0; i<PetscMaxThreads; i++) {
1186     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1187     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1188       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1189     }
1190     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1191   }
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MainJob_Main"
1196 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1197   int i,ierr;
1198   PetscErrorCode ijoberr = 0;
1199 
1200   MainWait(); /* you know everyone is waiting to be signalled! */
1201   job_main.pfunc = pFunc;
1202   job_main.pdata = data;
1203   for(i=0; i<PetscMaxThreads; i++) {
1204     *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1205   }
1206   /* tell the threads to go to work */
1207   for(i=0; i<PetscMaxThreads; i++) {
1208     ierr = pthread_cond_signal(job_main.cond2array[i]);
1209   }
1210   if(pFunc!=FuncFinish) {
1211     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1212   }
1213 
1214   if(ithreaderr) {
1215     ijoberr = ithreaderr;
1216   }
1217   return ijoberr;
1218 }
1219 /****  ****/
1220 
1221 /**** Chain Thread Functions ****/
1222 void* PetscThreadFunc_Chain(void* arg) {
1223   PetscErrorCode iterr;
1224   int icorr,ierr;
1225   int* pId = (int*)arg;
1226   int ThreadId = *pId;
1227   int SubWorker = ThreadId + 1;
1228   PetscBool PeeOn;
1229   cpu_set_t mset;
1230   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1231   icorr = ThreadCoreAffinity[ThreadId];
1232   CPU_ZERO(&mset);
1233   CPU_SET(icorr,&mset);
1234   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1235 
1236   if(ThreadId==(PetscMaxThreads-1)) {
1237     PeeOn = PETSC_TRUE;
1238   }
1239   else {
1240     PeeOn = PETSC_FALSE;
1241   }
1242   if(PeeOn==PETSC_FALSE) {
1243     /* check your subordinate, wait for him to be ready */
1244     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1245     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1246       /* upon entry, automically releases the lock and blocks
1247        upon return, has the lock */
1248       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1249     }
1250     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1251     /* your subordinate is now ready*/
1252   }
1253   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1254   /* update your ready status */
1255   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1256   if(ThreadId==0) {
1257     job_chain.eJobStat = JobCompleted;
1258     /* signal main */
1259     ierr = pthread_cond_signal(&main_cond);
1260   }
1261   else {
1262     /* tell your boss that you're ready to work */
1263     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1264   }
1265   /*  the while loop needs to have an exit
1266      the 'main' thread can terminate all the threads by performing a broadcast
1267    and calling FuncFinish */
1268   while(PetscThreadGo) {
1269     /* need to check the condition to ensure we don't have to wait
1270        waiting when you don't have to causes problems
1271      also need to check the condition to ensure proper handling of spurious wakeups */
1272     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1273       /*upon entry, automically releases the lock and blocks
1274        upon return, has the lock */
1275         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1276 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1277 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1278     }
1279     if(ThreadId==0) {
1280       job_chain.startJob = PETSC_FALSE;
1281       job_chain.eJobStat = ThreadsWorking;
1282     }
1283     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1284     if(PeeOn==PETSC_FALSE) {
1285       /* tell your subworker it's time to get to work */
1286       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1287     }
1288     /* do your job */
1289     if(job_chain.pdata==NULL) {
1290       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1291     }
1292     else {
1293       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1294     }
1295     if(iterr!=0) {
1296       ithreaderr = 1;
1297     }
1298     if(PetscThreadGo) {
1299       /* reset job, get ready for more */
1300       if(PeeOn==PETSC_FALSE) {
1301         /* check your subordinate, wait for him to be ready
1302          how do you know for a fact that your subordinate has actually started? */
1303         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1304         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1305           /* upon entry, automically releases the lock and blocks
1306            upon return, has the lock */
1307           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1308         }
1309         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1310         /* your subordinate is now ready */
1311       }
1312       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1313       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1314       if(ThreadId==0) {
1315 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1316         /* root thread (foreman) signals 'main' */
1317         ierr = pthread_cond_signal(&main_cond);
1318       }
1319       else {
1320         /* signal your boss before you go to sleep */
1321         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1322       }
1323     }
1324   }
1325   return NULL;
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "PetscThreadInitialize_Chain"
1330 void* PetscThreadInitialize_Chain(PetscInt N) {
1331   PetscInt i,ierr;
1332   int status;
1333 
1334   if(PetscUseThreadPool) {
1335     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1336     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1337     arrmutex = (char*)memalign(Val1,Val2);
1338     arrcond1 = (char*)memalign(Val1,Val2);
1339     arrcond2 = (char*)memalign(Val1,Val2);
1340     arrstart = (char*)memalign(Val1,Val2);
1341     arrready = (char*)memalign(Val1,Val2);
1342     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1343     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1344     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1345     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1346     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1347     /* initialize job structure */
1348     for(i=0; i<PetscMaxThreads; i++) {
1349       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1350       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1351       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1352       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1353       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1354     }
1355     for(i=0; i<PetscMaxThreads; i++) {
1356       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1357       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1358       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1359       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1360       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1361     }
1362     job_chain.pfunc = NULL;
1363     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1364     job_chain.startJob = PETSC_FALSE;
1365     job_chain.eJobStat = JobInitiated;
1366     pVal = (int*)malloc(N*sizeof(int));
1367     /* allocate memory in the heap for the thread structure */
1368     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1369     /* create threads */
1370     for(i=0; i<N; i++) {
1371       pVal[i] = i;
1372       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1373       /* should check error */
1374     }
1375   }
1376   else {
1377   }
1378   return NULL;
1379 }
1380 
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "PetscThreadFinalize_Chain"
1384 PetscErrorCode PetscThreadFinalize_Chain() {
1385   int i,ierr;
1386   void* jstatus;
1387 
1388   PetscFunctionBegin;
1389 
1390   if(PetscUseThreadPool) {
1391     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1392     /* join the threads */
1393     for(i=0; i<PetscMaxThreads; i++) {
1394       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1395       /* should check error */
1396     }
1397     free(PetscThreadPoint);
1398     free(arrmutex);
1399     free(arrcond1);
1400     free(arrcond2);
1401     free(arrstart);
1402     free(arrready);
1403     free(job_chain.pdata);
1404     free(pVal);
1405   }
1406   else {
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MainWait_Chain"
1413 void MainWait_Chain() {
1414   int ierr;
1415   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1416   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1417     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1418   }
1419   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MainJob_Chain"
1424 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1425   int i,ierr;
1426   PetscErrorCode ijoberr = 0;
1427 
1428   MainWait();
1429   job_chain.pfunc = pFunc;
1430   job_chain.pdata = data;
1431   job_chain.startJob = PETSC_TRUE;
1432   for(i=0; i<PetscMaxThreads; i++) {
1433     *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1434   }
1435   job_chain.eJobStat = JobInitiated;
1436   ierr = pthread_cond_signal(job_chain.cond2array[0]);
1437   if(pFunc!=FuncFinish) {
1438     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1439   }
1440 
1441   if(ithreaderr) {
1442     ijoberr = ithreaderr;
1443   }
1444   return ijoberr;
1445 }
1446 /****  ****/
1447 
1448 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1449 /**** True Thread Functions ****/
1450 void* PetscThreadFunc_True(void* arg) {
1451   int icorr,ierr,iVal;
1452   int* pId = (int*)arg;
1453   int ThreadId = *pId;
1454   PetscErrorCode iterr;
1455   cpu_set_t mset;
1456   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1457   icorr = ThreadCoreAffinity[ThreadId];
1458   CPU_ZERO(&mset);
1459   CPU_SET(icorr,&mset);
1460   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1461 
1462   ierr = pthread_mutex_lock(&job_true.mutex);
1463   job_true.iNumReadyThreads++;
1464   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1465     ierr = pthread_cond_signal(&main_cond);
1466   }
1467   /*the while loop needs to have an exit
1468     the 'main' thread can terminate all the threads by performing a broadcast
1469    and calling FuncFinish */
1470   while(PetscThreadGo) {
1471     /*need to check the condition to ensure we don't have to wait
1472       waiting when you don't have to causes problems
1473      also need to wait if another thread sneaks in and messes with the predicate */
1474     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1475       /* upon entry, automically releases the lock and blocks
1476        upon return, has the lock */
1477       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1478     }
1479     job_true.startJob = PETSC_FALSE;
1480     job_true.iNumJobThreads--;
1481     job_true.iNumReadyThreads--;
1482     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1483     pthread_mutex_unlock(&job_true.mutex);
1484     if(job_true.pdata==NULL) {
1485       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1486     }
1487     else {
1488       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1489     }
1490     if(iterr!=0) {
1491       ithreaderr = 1;
1492     }
1493     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1494       what happens if a thread finishes before they all start? BAD!
1495      what happens if a thread finishes before any else start? BAD! */
1496     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1497     /* reset job */
1498     if(PetscThreadGo) {
1499       pthread_mutex_lock(&job_true.mutex);
1500       job_true.iNumReadyThreads++;
1501       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1502 	/* signal the 'main' thread that the job is done! (only done once) */
1503 	ierr = pthread_cond_signal(&main_cond);
1504       }
1505     }
1506   }
1507   return NULL;
1508 }
1509 
1510 #undef __FUNCT__
1511 #define __FUNCT__ "PetscThreadInitialize_True"
1512 void* PetscThreadInitialize_True(PetscInt N) {
1513   PetscInt i;
1514   int status;
1515 
1516   if(PetscUseThreadPool) {
1517     pVal = (int*)malloc(N*sizeof(int));
1518     /* allocate memory in the heap for the thread structure */
1519     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1520     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1521     job_true.pdata = (void**)malloc(N*sizeof(void*));
1522     for(i=0; i<N; i++) {
1523       pVal[i] = i;
1524       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1525       /* error check to ensure proper thread creation */
1526       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1527       /* should check error */
1528     }
1529   }
1530   else {
1531   }
1532   return NULL;
1533 }
1534 
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "PetscThreadFinalize_True"
1538 PetscErrorCode PetscThreadFinalize_True() {
1539   int i,ierr;
1540   void* jstatus;
1541 
1542   PetscFunctionBegin;
1543 
1544   if(PetscUseThreadPool) {
1545     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1546     /* join the threads */
1547     for(i=0; i<PetscMaxThreads; i++) {
1548       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1549       /* should check error */
1550     }
1551     free(BarrPoint);
1552     free(PetscThreadPoint);
1553   }
1554   else {
1555   }
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MainWait_True"
1561 void MainWait_True() {
1562   int ierr;
1563   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1564     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1565   }
1566   ierr = pthread_mutex_unlock(&job_true.mutex);
1567 }
1568 
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MainJob_True"
1571 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1572   int ierr;
1573   PetscErrorCode ijoberr = 0;
1574 
1575   MainWait();
1576   job_true.pfunc = pFunc;
1577   job_true.pdata = data;
1578   job_true.pbarr = &BarrPoint[n];
1579   job_true.iNumJobThreads = n;
1580   job_true.startJob = PETSC_TRUE;
1581   ierr = pthread_cond_broadcast(&job_true.cond);
1582   if(pFunc!=FuncFinish) {
1583     MainWait(); /* why wait after? guarantees that job gets done */
1584   }
1585 
1586   if(ithreaderr) {
1587     ijoberr = ithreaderr;
1588   }
1589   return ijoberr;
1590 }
1591 /**** NO THREAD POOL FUNCTION ****/
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MainJob_Spawn"
1594 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1595   PetscErrorCode ijoberr = 0;
1596 
1597   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1598   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1599   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1600   free(apThread);
1601 
1602   return ijoberr;
1603 }
1604 /****  ****/
1605 #endif
1606 
1607 void* FuncFinish(void* arg) {
1608   PetscThreadGo = PETSC_FALSE;
1609   return(0);
1610 }
1611 
1612 #endif
1613