xref: /petsc/src/sys/objects/init.c (revision 0c53350f5bdd0b2c6514dccc899c865c02c9e7fe)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 //#if defined(PETSC_HAVE_SCHED_H) && defined(PETSC_USE_PTHREAD)
9 #ifndef _GNU_SOURCE
10 #define _GNU_SOURCE
11 #endif
12 #include <sched.h>
13 //#endif
14 #include <petscsys.h>        /*I  "petscsys.h"   I*/
15 #if defined(PETSC_USE_PTHREAD)
16 #include <pthread.h>
17 #endif
18 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
19 #include <sys/sysinfo.h>
20 #endif
21 #include <unistd.h>
22 #if defined(PETSC_HAVE_STDLIB_H)
23 #include <stdlib.h>
24 #endif
25 #if defined(PETSC_HAVE_MALLOC_H)
26 #include <malloc.h>
27 #endif
28 #if defined(PETSC_HAVE_VALGRIND)
29 #include <valgrind/valgrind.h>
30 #endif
31 
32 /* ------------------------Nasty global variables -------------------------------*/
33 /*
34      Indicates if PETSc started up MPI, or it was
35    already started before PETSc was initialized.
36 */
37 PetscBool    PetscBeganMPI         = PETSC_FALSE;
38 PetscBool    PetscInitializeCalled = PETSC_FALSE;
39 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
40 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
41 PetscBool    PetscThreadGo         = PETSC_TRUE;
42 PetscMPIInt  PetscGlobalRank = -1;
43 PetscMPIInt  PetscGlobalSize = -1;
44 
45 #if defined(PETSC_USE_PTHREAD_CLASSES)
46 PetscMPIInt  PetscMaxThreads = 2;
47 pthread_t*   PetscThreadPoint;
48 #define PETSC_HAVE_PTHREAD_BARRIER
49 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
50 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
51 #endif
52 PetscErrorCode ithreaderr = 0;
53 int*         pVal;
54 
55 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
56 int* ThreadCoreAffinity;
57 
58 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
59 
60 typedef struct {
61   pthread_mutex_t** mutexarray;
62   pthread_cond_t**  cond1array;
63   pthread_cond_t** cond2array;
64   void* (*pfunc)(void*);
65   void** pdata;
66   PetscBool startJob;
67   estat eJobStat;
68   PetscBool** arrThreadStarted;
69   PetscBool** arrThreadReady;
70 } sjob_tree;
71 sjob_tree job_tree;
72 typedef struct {
73   pthread_mutex_t** mutexarray;
74   pthread_cond_t**  cond1array;
75   pthread_cond_t** cond2array;
76   void* (*pfunc)(void*);
77   void** pdata;
78   PetscBool** arrThreadReady;
79 } sjob_main;
80 sjob_main job_main;
81 typedef struct {
82   pthread_mutex_t** mutexarray;
83   pthread_cond_t**  cond1array;
84   pthread_cond_t** cond2array;
85   void* (*pfunc)(void*);
86   void** pdata;
87   PetscBool startJob;
88   estat eJobStat;
89   PetscBool** arrThreadStarted;
90   PetscBool** arrThreadReady;
91 } sjob_chain;
92 sjob_chain job_chain;
93 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
94 typedef struct {
95   pthread_mutex_t mutex;
96   pthread_cond_t cond;
97   void* (*pfunc)(void*);
98   void** pdata;
99   pthread_barrier_t* pbarr;
100   int iNumJobThreads;
101   int iNumReadyThreads;
102   PetscBool startJob;
103 } sjob_true;
104 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
105 #endif
106 
107 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
108 char* arrmutex; /* used by 'chain','main','tree' thread pools */
109 char* arrcond1; /* used by 'chain','main','tree' thread pools */
110 char* arrcond2; /* used by 'chain','main','tree' thread pools */
111 char* arrstart; /* used by 'chain','main','tree' thread pools */
112 char* arrready; /* used by 'chain','main','tree' thread pools */
113 
114 /* Function Pointers */
115 void*          (*PetscThreadFunc)(void*) = NULL;
116 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
117 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
118 void           (*MainWait)(void) = NULL;
119 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
120 /**** Tree Thread Pool Functions ****/
121 void*          PetscThreadFunc_Tree(void*);
122 void*          PetscThreadInitialize_Tree(PetscInt);
123 PetscErrorCode PetscThreadFinalize_Tree(void);
124 void           MainWait_Tree(void);
125 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
126 /**** Main Thread Pool Functions ****/
127 void*          PetscThreadFunc_Main(void*);
128 void*          PetscThreadInitialize_Main(PetscInt);
129 PetscErrorCode PetscThreadFinalize_Main(void);
130 void           MainWait_Main(void);
131 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
132 /**** Chain Thread Pool Functions ****/
133 void*          PetscThreadFunc_Chain(void*);
134 void*          PetscThreadInitialize_Chain(PetscInt);
135 PetscErrorCode PetscThreadFinalize_Chain(void);
136 void           MainWait_Chain(void);
137 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
138 /**** True Thread Pool Functions ****/
139 void*          PetscThreadFunc_True(void*);
140 void*          PetscThreadInitialize_True(PetscInt);
141 PetscErrorCode PetscThreadFinalize_True(void);
142 void           MainWait_True(void);
143 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
144 /**** NO Thread Pool Function  ****/
145 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
146 /****  ****/
147 void* FuncFinish(void*);
148 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
149 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
150 #endif
151 
152 #if defined(PETSC_USE_COMPLEX)
153 #if defined(PETSC_COMPLEX_INSTANTIATE)
154 template <> class std::complex<double>; /* instantiate complex template class */
155 #endif
156 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
157 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
158 MPI_Datatype   MPI_C_COMPLEX;
159 #endif
160 PetscScalar    PETSC_i;
161 #else
162 PetscScalar    PETSC_i = 0.0;
163 #endif
164 #if defined(PETSC_USE_REAL___FLOAT128)
165 MPI_Datatype   MPIU___FLOAT128 = 0;
166 #endif
167 MPI_Datatype   MPIU_2SCALAR = 0;
168 MPI_Datatype   MPIU_2INT = 0;
169 
170 /*
171      These are needed by petscbt.h
172 */
173 #include <petscbt.h>
174 char      _BT_mask = ' ';
175 char      _BT_c = ' ';
176 PetscInt  _BT_idx  = 0;
177 
178 /*
179        Function that is called to display all error messages
180 */
181 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
182 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
183 #if defined(PETSC_HAVE_MATLAB_ENGINE)
184 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
185 #else
186 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
187 #endif
188 /*
189   This is needed to turn on/off cusp synchronization */
190 PetscBool   synchronizeCUSP = PETSC_FALSE;
191 
192 /* ------------------------------------------------------------------------------*/
193 /*
194    Optional file where all PETSc output from various prints is saved
195 */
196 FILE *petsc_history = PETSC_NULL;
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "PetscOpenHistoryFile"
200 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
201 {
202   PetscErrorCode ierr;
203   PetscMPIInt    rank,size;
204   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
205   char           version[256];
206 
207   PetscFunctionBegin;
208   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
209   if (!rank) {
210     char        arch[10];
211     int         err;
212     PetscViewer viewer;
213 
214     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
215     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
216     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
217     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
218     if (filename) {
219       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
220     } else {
221       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
222       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
223       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
224     }
225 
226     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
227     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
228     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
229     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
230     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
231     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
232     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
233     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
234     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
235     err = fflush(*fd);
236     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
237   }
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "PetscCloseHistoryFile"
243 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
244 {
245   PetscErrorCode ierr;
246   PetscMPIInt    rank;
247   char           date[64];
248   int            err;
249 
250   PetscFunctionBegin;
251   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
252   if (!rank) {
253     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
254     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
255     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
256     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
257     err = fflush(*fd);
258     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
259     err = fclose(*fd);
260     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 /* ------------------------------------------------------------------------------*/
266 
267 /*
268    This is ugly and probably belongs somewhere else, but I want to
269   be able to put a true MPI abort error handler with command line args.
270 
271     This is so MPI errors in the debugger will leave all the stack
272   frames. The default MP_Abort() cleans up and exits thus providing no useful information
273   in the debugger hence we call abort() instead of MPI_Abort().
274 */
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "Petsc_MPI_AbortOnError"
278 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
279 {
280   PetscFunctionBegin;
281   (*PetscErrorPrintf)("MPI error %d\n",*flag);
282   abort();
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
287 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
288 {
289   PetscErrorCode ierr;
290 
291   PetscFunctionBegin;
292   (*PetscErrorPrintf)("MPI error %d\n",*flag);
293   ierr = PetscAttachDebugger();
294   if (ierr) { /* hopeless so get out */
295     MPI_Abort(*comm,*flag);
296   }
297 }
298 
299 #undef __FUNCT__
300 #define __FUNCT__ "PetscEnd"
301 /*@C
302    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
303      wishes a clean exit somewhere deep in the program.
304 
305    Collective on PETSC_COMM_WORLD
306 
307    Options Database Keys are the same as for PetscFinalize()
308 
309    Level: advanced
310 
311    Note:
312    See PetscInitialize() for more general runtime options.
313 
314 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
315 @*/
316 PetscErrorCode  PetscEnd(void)
317 {
318   PetscFunctionBegin;
319   PetscFinalize();
320   exit(0);
321   return 0;
322 }
323 
324 PetscBool    PetscOptionsPublish = PETSC_FALSE;
325 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
326 extern PetscBool  petscsetmallocvisited;
327 static char       emacsmachinename[256];
328 
329 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
330 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
331 
332 #undef __FUNCT__
333 #define __FUNCT__ "PetscSetHelpVersionFunctions"
334 /*@C
335    PetscSetHelpVersionFunctions - Sets functions that print help and version information
336    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
337    This routine enables a "higher-level" package that uses PETSc to print its messages first.
338 
339    Input Parameter:
340 +  help - the help function (may be PETSC_NULL)
341 -  version - the version function (may be PETSC_NULL)
342 
343    Level: developer
344 
345    Concepts: package help message
346 
347 @*/
348 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
349 {
350   PetscFunctionBegin;
351   PetscExternalHelpFunction    = help;
352   PetscExternalVersionFunction = version;
353   PetscFunctionReturn(0);
354 }
355 
356 #undef __FUNCT__
357 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
358 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
359 {
360   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
361   MPI_Comm       comm = PETSC_COMM_WORLD;
362   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
363   PetscErrorCode ierr;
364   PetscReal      si;
365   int            i;
366   PetscMPIInt    rank;
367   char           version[256];
368 
369   PetscFunctionBegin;
370   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
371 
372   /*
373       Setup the memory management; support for tracing malloc() usage
374   */
375   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
376 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
377   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
378   if ((!flg2 || flg1) && !petscsetmallocvisited) {
379 #if defined(PETSC_HAVE_VALGRIND)
380     if (flg2 || !(RUNNING_ON_VALGRIND)) {
381       /* turn off default -malloc if valgrind is being used */
382 #endif
383       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
384 #if defined(PETSC_HAVE_VALGRIND)
385     }
386 #endif
387   }
388 #else
389   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
390   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
391   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
392 #endif
393   if (flg3) {
394     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
395   }
396   flg1 = PETSC_FALSE;
397   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
398   if (flg1) {
399     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
400     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
401   }
402 
403   flg1 = PETSC_FALSE;
404   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
405   if (!flg1) {
406     flg1 = PETSC_FALSE;
407     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
408   }
409   if (flg1) {
410     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
411   }
412 
413   /*
414       Set the display variable for graphics
415   */
416   ierr = PetscSetDisplay();CHKERRQ(ierr);
417 
418 
419   /*
420       Print the PETSc version information
421   */
422   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
423   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
424   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
425   if (flg1 || flg2 || flg3){
426 
427     /*
428        Print "higher-level" package version message
429     */
430     if (PetscExternalVersionFunction) {
431       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
432     }
433 
434     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
435     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
436 ------------------------------\n");CHKERRQ(ierr);
437     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
439     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
441     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
442     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
443     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
444 ------------------------------\n");CHKERRQ(ierr);
445   }
446 
447   /*
448        Print "higher-level" package help message
449   */
450   if (flg3){
451     if (PetscExternalHelpFunction) {
452       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
453     }
454   }
455 
456   /*
457       Setup the error handling
458   */
459   flg1 = PETSC_FALSE;
460   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
461   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
462   flg1 = PETSC_FALSE;
463   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
464   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
465   flg1 = PETSC_FALSE;
466   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
467   if (flg1) {
468     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
469   }
470   flg1 = PETSC_FALSE;
471   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
472   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
473   flg1 = PETSC_FALSE;
474   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
475   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
476 
477   /*
478       Setup debugger information
479   */
480   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
481   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
482   if (flg1) {
483     MPI_Errhandler err_handler;
484 
485     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
486     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
487     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
488     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
489   }
490   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
491   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
492   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
493   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
494   if (flg1 || flg2) {
495     PetscMPIInt    size;
496     PetscInt       lsize,*nodes;
497     MPI_Errhandler err_handler;
498     /*
499        we have to make sure that all processors have opened
500        connections to all other processors, otherwise once the
501        debugger has stated it is likely to receive a SIGUSR1
502        and kill the program.
503     */
504     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
505     if (size > 2) {
506       PetscMPIInt dummy = 0;
507       MPI_Status  status;
508       for (i=0; i<size; i++) {
509         if (rank != i) {
510           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
511         }
512       }
513       for (i=0; i<size; i++) {
514         if (rank != i) {
515           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
516         }
517       }
518     }
519     /* check if this processor node should be in debugger */
520     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
521     lsize = size;
522     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
523     if (flag) {
524       for (i=0; i<lsize; i++) {
525         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
526       }
527     }
528     if (!flag) {
529       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
530       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
531       if (flg1) {
532         ierr = PetscAttachDebugger();CHKERRQ(ierr);
533       } else {
534         ierr = PetscStopForDebugger();CHKERRQ(ierr);
535       }
536       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
537       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
538     }
539     ierr = PetscFree(nodes);CHKERRQ(ierr);
540   }
541 
542   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
543   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
544 
545 #if defined(PETSC_USE_SOCKET_VIEWER)
546   /*
547     Activates new sockets for zope if needed
548   */
549   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
550   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
551   if (flgz){
552     int  sockfd;
553     char hostname[256];
554     char username[256];
555     int  remoteport = 9999;
556 
557     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
558     if (!hostname[0]){
559       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
560     }
561     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
562     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
563     PETSC_ZOPEFD = fdopen(sockfd, "w");
564     if (flgzout){
565       PETSC_STDOUT = PETSC_ZOPEFD;
566       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
567       fprintf(PETSC_STDOUT, "<<<start>>>");
568     } else {
569       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
570       fprintf(PETSC_ZOPEFD, "<<<start>>>");
571     }
572   }
573 #endif
574 #if defined(PETSC_USE_SERVER)
575   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
576   if (flgz){
577     PetscInt port = PETSC_DECIDE;
578     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
579     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
580   }
581 #endif
582 
583   /*
584         Setup profiling and logging
585   */
586 #if defined (PETSC_USE_INFO)
587   {
588     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
589     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
590     if (flg1 && logname[0]) {
591       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
592     } else if (flg1) {
593       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
594     }
595   }
596 #endif
597 #if defined(PETSC_USE_LOG)
598   mname[0] = 0;
599   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
600   if (flg1) {
601     if (mname[0]) {
602       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
603     } else {
604       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
605     }
606   }
607 #if defined(PETSC_HAVE_MPE)
608   flg1 = PETSC_FALSE;
609   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
610   if (flg1) PetscLogMPEBegin();
611 #endif
612   flg1 = PETSC_FALSE;
613   flg2 = PETSC_FALSE;
614   flg3 = PETSC_FALSE;
615   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
616   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
617   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
618   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
619   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
620   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
621 
622   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
623   if (flg1) {
624     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
625     FILE *file;
626     if (mname[0]) {
627       sprintf(name,"%s.%d",mname,rank);
628       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
629       file = fopen(fname,"w");
630       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
631     } else {
632       file = PETSC_STDOUT;
633     }
634     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
635   }
636 #endif
637 
638   /*
639       Setup building of stack frames for all function calls
640   */
641 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
642   ierr = PetscStackCreate();CHKERRQ(ierr);
643 #endif
644 
645   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
646 
647 #if defined(PETSC_USE_PTHREAD_CLASSES)
648   /*
649       Determine whether user specified maximum number of threads
650    */
651   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
652 
653   ierr = PetscOptionsHasName(PETSC_NULL,"-main",&flg1);CHKERRQ(ierr);
654   if(flg1) {
655     cpu_set_t mset;
656     int icorr,ncorr = get_nprocs();
657     ierr = PetscOptionsGetInt(PETSC_NULL,"-main",&icorr,PETSC_NULL);CHKERRQ(ierr);
658     CPU_ZERO(&mset);
659     CPU_SET(icorr%ncorr,&mset);
660     sched_setaffinity(0,sizeof(cpu_set_t),&mset);
661   }
662 
663   /*
664       Determine whether to use thread pool
665    */
666   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
667   if (flg1) {
668     PetscUseThreadPool = PETSC_TRUE;
669     PetscInt N_CORES = get_nprocs();
670     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
671     char tstr[9];
672     char tbuf[2];
673     strcpy(tstr,"-thread");
674     for(i=0;i<PetscMaxThreads;i++) {
675       ThreadCoreAffinity[i] = i;
676       sprintf(tbuf,"%d",i);
677       strcat(tstr,tbuf);
678       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
679       if(flg1) {
680         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
681         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
682       }
683       tstr[7] = '\0';
684     }
685     /* get the thread pool type */
686     PetscInt ipool = 0;
687     const char *choices[4] = {"true","tree","main","chain"};
688 
689     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
690     switch(ipool) {
691     case 1:
692       PetscThreadFunc       = &PetscThreadFunc_Tree;
693       PetscThreadInitialize = &PetscThreadInitialize_Tree;
694       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
695       MainWait              = &MainWait_Tree;
696       MainJob               = &MainJob_Tree;
697       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
698       break;
699     case 2:
700       PetscThreadFunc       = &PetscThreadFunc_Main;
701       PetscThreadInitialize = &PetscThreadInitialize_Main;
702       PetscThreadFinalize   = &PetscThreadFinalize_Main;
703       MainWait              = &MainWait_Main;
704       MainJob               = &MainJob_Main;
705       PetscInfo(PETSC_NULL,"Using main thread pool\n");
706       break;
707 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
708     case 3:
709 #else
710     default:
711 #endif
712       PetscThreadFunc       = &PetscThreadFunc_Chain;
713       PetscThreadInitialize = &PetscThreadInitialize_Chain;
714       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
715       MainWait              = &MainWait_Chain;
716       MainJob               = &MainJob_Chain;
717       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
718       break;
719 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
720     default:
721       PetscThreadFunc       = &PetscThreadFunc_True;
722       PetscThreadInitialize = &PetscThreadInitialize_True;
723       PetscThreadFinalize   = &PetscThreadFinalize_True;
724       MainWait              = &MainWait_True;
725       MainJob               = &MainJob_True;
726       PetscInfo(PETSC_NULL,"Using true thread pool\n");
727       break;
728 #endif
729     }
730     PetscThreadInitialize(PetscMaxThreads);
731   } else {
732     //need to define these in the case on 'no threads' or 'thread create/destroy'
733     //could take any of the above versions
734     MainJob               = &MainJob_Spawn;
735   }
736 #endif
737   /*
738        Print basic help message
739   */
740   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
741   if (flg1) {
742     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
743     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
763     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
764     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
768     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
769     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
770     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
771     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
773     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
776 #if defined(PETSC_USE_LOG)
777     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
778     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
779     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
780 #if defined(PETSC_HAVE_MPE)
781     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
782 #endif
783     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
784 #endif
785     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
786     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
787     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
788     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
789   }
790 
791   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
792   if (flg1) {
793     ierr = PetscSleep(si);CHKERRQ(ierr);
794   }
795 
796   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
797   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
798   if (f) {
799     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
800   }
801 
802 #if defined(PETSC_HAVE_CUSP)
803   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
804   if (flg3) flg1 = PETSC_TRUE;
805   else flg1 = PETSC_FALSE;
806   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
807   if (flg1) synchronizeCUSP = PETSC_TRUE;
808 #endif
809 
810   PetscFunctionReturn(0);
811 }
812 
813 #if defined(PETSC_USE_PTHREAD_CLASSES)
814 
815 /**** 'Tree' Thread Pool Functions ****/
816 void* PetscThreadFunc_Tree(void* arg) {
817   PetscErrorCode iterr;
818   int icorr,ierr;
819   int* pId = (int*)arg;
820   int ThreadId = *pId,Mary = 2,i,SubWorker;
821   PetscBool PeeOn;
822   cpu_set_t mset;
823   //printf("Thread %d In Tree Thread Function\n",ThreadId);
824   icorr = ThreadCoreAffinity[ThreadId];
825   CPU_ZERO(&mset);
826   CPU_SET(icorr,&mset);
827   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
828 
829   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
830     PeeOn = PETSC_TRUE;
831   }
832   else {
833     PeeOn = PETSC_FALSE;
834   }
835   if(PeeOn==PETSC_FALSE) {
836     /* check your subordinates, wait for them to be ready */
837     for(i=1;i<=Mary;i++) {
838       SubWorker = Mary*ThreadId+i;
839       if(SubWorker<PetscMaxThreads) {
840         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
841         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
842           /* upon entry, automically releases the lock and blocks
843            upon return, has the lock */
844           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
845         }
846         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
847       }
848     }
849     /* your subordinates are now ready */
850   }
851   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
852   /* update your ready status */
853   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
854   if(ThreadId==0) {
855     job_tree.eJobStat = JobCompleted;
856     /* ignal main */
857     ierr = pthread_cond_signal(&main_cond);
858   }
859   else {
860     /* tell your boss that you're ready to work */
861     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
862   }
863   /* the while loop needs to have an exit
864   the 'main' thread can terminate all the threads by performing a broadcast
865    and calling FuncFinish */
866   while(PetscThreadGo) {
867     /*need to check the condition to ensure we don't have to wait
868       waiting when you don't have to causes problems
869      also need to check the condition to ensure proper handling of spurious wakeups */
870     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
871       /* upon entry, automically releases the lock and blocks
872        upon return, has the lock */
873         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
874 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
875 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
876     }
877     if(ThreadId==0) {
878       job_tree.startJob = PETSC_FALSE;
879       job_tree.eJobStat = ThreadsWorking;
880     }
881     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
882     if(PeeOn==PETSC_FALSE) {
883       /* tell your subordinates it's time to get to work */
884       for(i=1; i<=Mary; i++) {
885 	SubWorker = Mary*ThreadId+i;
886         if(SubWorker<PetscMaxThreads) {
887           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
888         }
889       }
890     }
891     /* do your job */
892     if(job_tree.pdata==NULL) {
893       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
894     }
895     else {
896       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
897     }
898     if(iterr!=0) {
899       ithreaderr = 1;
900     }
901     if(PetscThreadGo) {
902       /* reset job, get ready for more */
903       if(PeeOn==PETSC_FALSE) {
904         /* check your subordinates, waiting for them to be ready
905          how do you know for a fact that a given subordinate has actually started? */
906 	for(i=1;i<=Mary;i++) {
907 	  SubWorker = Mary*ThreadId+i;
908           if(SubWorker<PetscMaxThreads) {
909             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
910             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
911               /* upon entry, automically releases the lock and blocks
912                upon return, has the lock */
913               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
914             }
915             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
916           }
917 	}
918         /* your subordinates are now ready */
919       }
920       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
921       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
922       if(ThreadId==0) {
923 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
924         /* root thread signals 'main' */
925         ierr = pthread_cond_signal(&main_cond);
926       }
927       else {
928         /* signal your boss before you go to sleep */
929         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
930       }
931     }
932   }
933   return NULL;
934 }
935 
936 #undef __FUNCT__
937 #define __FUNCT__ "PetscThreadInitialize_Tree"
938 void* PetscThreadInitialize_Tree(PetscInt N) {
939   PetscInt i,ierr;
940   int status;
941 
942   if(PetscUseThreadPool) {
943     size_t Val1 = (size_t)CACHE_LINE_SIZE;
944     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
945     arrmutex = (char*)memalign(Val1,Val2);
946     arrcond1 = (char*)memalign(Val1,Val2);
947     arrcond2 = (char*)memalign(Val1,Val2);
948     arrstart = (char*)memalign(Val1,Val2);
949     arrready = (char*)memalign(Val1,Val2);
950     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
951     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
952     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
953     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
954     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
955     /* initialize job structure */
956     for(i=0; i<PetscMaxThreads; i++) {
957       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
958       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
959       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
960       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
961       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
962     }
963     for(i=0; i<PetscMaxThreads; i++) {
964       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
965       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
966       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
967       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
968       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
969     }
970     job_tree.pfunc = NULL;
971     job_tree.pdata = (void**)malloc(N*sizeof(void*));
972     job_tree.startJob = PETSC_FALSE;
973     job_tree.eJobStat = JobInitiated;
974     pVal = (int*)malloc(N*sizeof(int));
975     /* allocate memory in the heap for the thread structure */
976     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
977     /* create threads */
978     for(i=0; i<N; i++) {
979       pVal[i] = i;
980       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
981       /* should check status */
982     }
983   }
984   return NULL;
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "PetscThreadFinalize_Tree"
989 PetscErrorCode PetscThreadFinalize_Tree() {
990   int i,ierr;
991   void* jstatus;
992 
993   PetscFunctionBegin;
994 
995   if(PetscUseThreadPool) {
996     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
997     /* join the threads */
998     for(i=0; i<PetscMaxThreads; i++) {
999       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1000       /* do error checking*/
1001     }
1002     free(PetscThreadPoint);
1003     free(arrmutex);
1004     free(arrcond1);
1005     free(arrcond2);
1006     free(arrstart);
1007     free(arrready);
1008     free(job_tree.pdata);
1009     free(pVal);
1010   }
1011   else {
1012   }
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MainWait_Tree"
1018 void MainWait_Tree() {
1019   int ierr;
1020   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1021   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1022     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1023   }
1024   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MainJob_Tree"
1029 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1030   int i,ierr;
1031   PetscErrorCode ijoberr = 0;
1032 
1033   MainWait();
1034   job_tree.pfunc = pFunc;
1035   job_tree.pdata = data;
1036   job_tree.startJob = PETSC_TRUE;
1037   for(i=0; i<PetscMaxThreads; i++) {
1038     *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1039   }
1040   job_tree.eJobStat = JobInitiated;
1041   ierr = pthread_cond_signal(job_tree.cond2array[0]);
1042   if(pFunc!=FuncFinish) {
1043     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1044   }
1045 
1046   if(ithreaderr) {
1047     ijoberr = ithreaderr;
1048   }
1049   return ijoberr;
1050 }
1051 /****  ****/
1052 
1053 /**** 'Main' Thread Pool Functions ****/
1054 void* PetscThreadFunc_Main(void* arg) {
1055   PetscErrorCode iterr;
1056   int icorr,ierr;
1057   int* pId = (int*)arg;
1058   int ThreadId = *pId;
1059   cpu_set_t mset;
1060   //printf("Thread %d In Main Thread Function\n",ThreadId);
1061   icorr = ThreadCoreAffinity[ThreadId];
1062   CPU_ZERO(&mset);
1063   CPU_SET(icorr,&mset);
1064   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1065 
1066   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1067   /* update your ready status */
1068   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1069   /* tell the BOSS that you're ready to work before you go to sleep */
1070   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1071 
1072   /* the while loop needs to have an exit
1073      the 'main' thread can terminate all the threads by performing a broadcast
1074      and calling FuncFinish */
1075   while(PetscThreadGo) {
1076     /* need to check the condition to ensure we don't have to wait
1077        waiting when you don't have to causes problems
1078      also need to check the condition to ensure proper handling of spurious wakeups */
1079     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1080       /* upon entry, atomically releases the lock and blocks
1081        upon return, has the lock */
1082         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1083 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1084     }
1085     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1086     if(job_main.pdata==NULL) {
1087       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1088     }
1089     else {
1090       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1091     }
1092     if(iterr!=0) {
1093       ithreaderr = 1;
1094     }
1095     if(PetscThreadGo) {
1096       /* reset job, get ready for more */
1097       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1098       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1099       /* tell the BOSS that you're ready to work before you go to sleep */
1100       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1101     }
1102   }
1103   return NULL;
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "PetscThreadInitialize_Main"
1108 void* PetscThreadInitialize_Main(PetscInt N) {
1109   PetscInt i,ierr;
1110   int status;
1111 
1112   if(PetscUseThreadPool) {
1113     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1114     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1115     arrmutex = (char*)memalign(Val1,Val2);
1116     arrcond1 = (char*)memalign(Val1,Val2);
1117     arrcond2 = (char*)memalign(Val1,Val2);
1118     arrstart = (char*)memalign(Val1,Val2);
1119     arrready = (char*)memalign(Val1,Val2);
1120     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1121     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1122     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1123     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1124     /* initialize job structure */
1125     for(i=0; i<PetscMaxThreads; i++) {
1126       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1127       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1128       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1129       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1130     }
1131     for(i=0; i<PetscMaxThreads; i++) {
1132       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1133       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1134       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1135       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1136     }
1137     job_main.pfunc = NULL;
1138     job_main.pdata = (void**)malloc(N*sizeof(void*));
1139     pVal = (int*)malloc(N*sizeof(int));
1140     /* allocate memory in the heap for the thread structure */
1141     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1142     /* create threads */
1143     for(i=0; i<N; i++) {
1144       pVal[i] = i;
1145       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1146       /* error check */
1147     }
1148   }
1149   else {
1150   }
1151   return NULL;
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "PetscThreadFinalize_Main"
1156 PetscErrorCode PetscThreadFinalize_Main() {
1157   int i,ierr;
1158   void* jstatus;
1159 
1160   PetscFunctionBegin;
1161 
1162   if(PetscUseThreadPool) {
1163     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1164     /* join the threads */
1165     for(i=0; i<PetscMaxThreads; i++) {
1166       ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1167     }
1168     free(PetscThreadPoint);
1169     free(arrmutex);
1170     free(arrcond1);
1171     free(arrcond2);
1172     free(arrstart);
1173     free(arrready);
1174     free(job_main.pdata);
1175     free(pVal);
1176   }
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MainWait_Main"
1182 void MainWait_Main() {
1183   int i,ierr;
1184   for(i=0; i<PetscMaxThreads; i++) {
1185     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1186     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1187       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1188     }
1189     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1190   }
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MainJob_Main"
1195 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1196   int i,ierr;
1197   PetscErrorCode ijoberr = 0;
1198 
1199   MainWait(); /* you know everyone is waiting to be signalled! */
1200   job_main.pfunc = pFunc;
1201   job_main.pdata = data;
1202   for(i=0; i<PetscMaxThreads; i++) {
1203     *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1204   }
1205   /* tell the threads to go to work */
1206   for(i=0; i<PetscMaxThreads; i++) {
1207     ierr = pthread_cond_signal(job_main.cond2array[i]);
1208   }
1209   if(pFunc!=FuncFinish) {
1210     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1211   }
1212 
1213   if(ithreaderr) {
1214     ijoberr = ithreaderr;
1215   }
1216   return ijoberr;
1217 }
1218 /****  ****/
1219 
1220 /**** Chain Thread Functions ****/
1221 void* PetscThreadFunc_Chain(void* arg) {
1222   PetscErrorCode iterr;
1223   int icorr,ierr;
1224   int* pId = (int*)arg;
1225   int ThreadId = *pId;
1226   int SubWorker = ThreadId + 1;
1227   PetscBool PeeOn;
1228   cpu_set_t mset;
1229   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1230   icorr = ThreadCoreAffinity[ThreadId];
1231   CPU_ZERO(&mset);
1232   CPU_SET(icorr,&mset);
1233   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1234 
1235   if(ThreadId==(PetscMaxThreads-1)) {
1236     PeeOn = PETSC_TRUE;
1237   }
1238   else {
1239     PeeOn = PETSC_FALSE;
1240   }
1241   if(PeeOn==PETSC_FALSE) {
1242     /* check your subordinate, wait for him to be ready */
1243     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1244     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1245       /* upon entry, automically releases the lock and blocks
1246        upon return, has the lock */
1247       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1248     }
1249     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1250     /* your subordinate is now ready*/
1251   }
1252   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1253   /* update your ready status */
1254   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1255   if(ThreadId==0) {
1256     job_chain.eJobStat = JobCompleted;
1257     /* signal main */
1258     ierr = pthread_cond_signal(&main_cond);
1259   }
1260   else {
1261     /* tell your boss that you're ready to work */
1262     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1263   }
1264   /*  the while loop needs to have an exit
1265      the 'main' thread can terminate all the threads by performing a broadcast
1266    and calling FuncFinish */
1267   while(PetscThreadGo) {
1268     /* need to check the condition to ensure we don't have to wait
1269        waiting when you don't have to causes problems
1270      also need to check the condition to ensure proper handling of spurious wakeups */
1271     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1272       /*upon entry, automically releases the lock and blocks
1273        upon return, has the lock */
1274         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1275 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1276 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1277     }
1278     if(ThreadId==0) {
1279       job_chain.startJob = PETSC_FALSE;
1280       job_chain.eJobStat = ThreadsWorking;
1281     }
1282     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1283     if(PeeOn==PETSC_FALSE) {
1284       /* tell your subworker it's time to get to work */
1285       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1286     }
1287     /* do your job */
1288     if(job_chain.pdata==NULL) {
1289       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1290     }
1291     else {
1292       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1293     }
1294     if(iterr!=0) {
1295       ithreaderr = 1;
1296     }
1297     if(PetscThreadGo) {
1298       /* reset job, get ready for more */
1299       if(PeeOn==PETSC_FALSE) {
1300         /* check your subordinate, wait for him to be ready
1301          how do you know for a fact that your subordinate has actually started? */
1302         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1303         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1304           /* upon entry, automically releases the lock and blocks
1305            upon return, has the lock */
1306           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1307         }
1308         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1309         /* your subordinate is now ready */
1310       }
1311       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1312       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1313       if(ThreadId==0) {
1314 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1315         /* root thread (foreman) signals 'main' */
1316         ierr = pthread_cond_signal(&main_cond);
1317       }
1318       else {
1319         /* signal your boss before you go to sleep */
1320         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1321       }
1322     }
1323   }
1324   return NULL;
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "PetscThreadInitialize_Chain"
1329 void* PetscThreadInitialize_Chain(PetscInt N) {
1330   PetscInt i,ierr;
1331   int status;
1332 
1333   if(PetscUseThreadPool) {
1334     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1335     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1336     arrmutex = (char*)memalign(Val1,Val2);
1337     arrcond1 = (char*)memalign(Val1,Val2);
1338     arrcond2 = (char*)memalign(Val1,Val2);
1339     arrstart = (char*)memalign(Val1,Val2);
1340     arrready = (char*)memalign(Val1,Val2);
1341     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1342     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1343     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1344     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1345     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1346     /* initialize job structure */
1347     for(i=0; i<PetscMaxThreads; i++) {
1348       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1349       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1350       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1351       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1352       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1353     }
1354     for(i=0; i<PetscMaxThreads; i++) {
1355       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1356       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1357       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1358       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1359       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1360     }
1361     job_chain.pfunc = NULL;
1362     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1363     job_chain.startJob = PETSC_FALSE;
1364     job_chain.eJobStat = JobInitiated;
1365     pVal = (int*)malloc(N*sizeof(int));
1366     /* allocate memory in the heap for the thread structure */
1367     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1368     /* create threads */
1369     for(i=0; i<N; i++) {
1370       pVal[i] = i;
1371       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1372       /* should check error */
1373     }
1374   }
1375   else {
1376   }
1377   return NULL;
1378 }
1379 
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "PetscThreadFinalize_Chain"
1383 PetscErrorCode PetscThreadFinalize_Chain() {
1384   int i,ierr;
1385   void* jstatus;
1386 
1387   PetscFunctionBegin;
1388 
1389   if(PetscUseThreadPool) {
1390     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1391     /* join the threads */
1392     for(i=0; i<PetscMaxThreads; i++) {
1393       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1394       /* should check error */
1395     }
1396     free(PetscThreadPoint);
1397     free(arrmutex);
1398     free(arrcond1);
1399     free(arrcond2);
1400     free(arrstart);
1401     free(arrready);
1402     free(job_chain.pdata);
1403     free(pVal);
1404   }
1405   else {
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MainWait_Chain"
1412 void MainWait_Chain() {
1413   int ierr;
1414   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1415   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1416     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1417   }
1418   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MainJob_Chain"
1423 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1424   int i,ierr;
1425   PetscErrorCode ijoberr = 0;
1426 
1427   MainWait();
1428   job_chain.pfunc = pFunc;
1429   job_chain.pdata = data;
1430   job_chain.startJob = PETSC_TRUE;
1431   for(i=0; i<PetscMaxThreads; i++) {
1432     *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1433   }
1434   job_chain.eJobStat = JobInitiated;
1435   ierr = pthread_cond_signal(job_chain.cond2array[0]);
1436   if(pFunc!=FuncFinish) {
1437     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1438   }
1439 
1440   if(ithreaderr) {
1441     ijoberr = ithreaderr;
1442   }
1443   return ijoberr;
1444 }
1445 /****  ****/
1446 
1447 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1448 /**** True Thread Functions ****/
1449 void* PetscThreadFunc_True(void* arg) {
1450   int icorr,ierr,iVal;
1451   int* pId = (int*)arg;
1452   int ThreadId = *pId;
1453   PetscErrorCode iterr;
1454   cpu_set_t mset;
1455   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1456   icorr = ThreadCoreAffinity[ThreadId];
1457   CPU_ZERO(&mset);
1458   CPU_SET(icorr,&mset);
1459   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1460 
1461   ierr = pthread_mutex_lock(&job_true.mutex);
1462   job_true.iNumReadyThreads++;
1463   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1464     ierr = pthread_cond_signal(&main_cond);
1465   }
1466   /*the while loop needs to have an exit
1467     the 'main' thread can terminate all the threads by performing a broadcast
1468    and calling FuncFinish */
1469   while(PetscThreadGo) {
1470     /*need to check the condition to ensure we don't have to wait
1471       waiting when you don't have to causes problems
1472      also need to wait if another thread sneaks in and messes with the predicate */
1473     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1474       /* upon entry, automically releases the lock and blocks
1475        upon return, has the lock */
1476       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1477     }
1478     job_true.startJob = PETSC_FALSE;
1479     job_true.iNumJobThreads--;
1480     job_true.iNumReadyThreads--;
1481     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1482     pthread_mutex_unlock(&job_true.mutex);
1483     if(job_true.pdata==NULL) {
1484       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1485     }
1486     else {
1487       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1488     }
1489     if(iterr!=0) {
1490       ithreaderr = 1;
1491     }
1492     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1493       what happens if a thread finishes before they all start? BAD!
1494      what happens if a thread finishes before any else start? BAD! */
1495     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1496     /* reset job */
1497     if(PetscThreadGo) {
1498       pthread_mutex_lock(&job_true.mutex);
1499       job_true.iNumReadyThreads++;
1500       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1501 	/* signal the 'main' thread that the job is done! (only done once) */
1502 	ierr = pthread_cond_signal(&main_cond);
1503       }
1504     }
1505   }
1506   return NULL;
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "PetscThreadInitialize_True"
1511 void* PetscThreadInitialize_True(PetscInt N) {
1512   PetscInt i;
1513   int status;
1514 
1515   if(PetscUseThreadPool) {
1516     pVal = (int*)malloc(N*sizeof(int));
1517     /* allocate memory in the heap for the thread structure */
1518     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1519     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1520     job_true.pdata = (void**)malloc(N*sizeof(void*));
1521     for(i=0; i<N; i++) {
1522       pVal[i] = i;
1523       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1524       /* error check to ensure proper thread creation */
1525       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1526       /* should check error */
1527     }
1528   }
1529   else {
1530   }
1531   return NULL;
1532 }
1533 
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "PetscThreadFinalize_True"
1537 PetscErrorCode PetscThreadFinalize_True() {
1538   int i,ierr;
1539   void* jstatus;
1540 
1541   PetscFunctionBegin;
1542 
1543   if(PetscUseThreadPool) {
1544     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1545     /* join the threads */
1546     for(i=0; i<PetscMaxThreads; i++) {
1547       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1548       /* should check error */
1549     }
1550     free(BarrPoint);
1551     free(PetscThreadPoint);
1552   }
1553   else {
1554   }
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MainWait_True"
1560 void MainWait_True() {
1561   int ierr;
1562   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1563     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1564   }
1565   ierr = pthread_mutex_unlock(&job_true.mutex);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MainJob_True"
1570 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1571   int ierr;
1572   PetscErrorCode ijoberr = 0;
1573 
1574   MainWait();
1575   job_true.pfunc = pFunc;
1576   job_true.pdata = data;
1577   job_true.pbarr = &BarrPoint[n];
1578   job_true.iNumJobThreads = n;
1579   job_true.startJob = PETSC_TRUE;
1580   ierr = pthread_cond_broadcast(&job_true.cond);
1581   if(pFunc!=FuncFinish) {
1582     MainWait(); /* why wait after? guarantees that job gets done */
1583   }
1584 
1585   if(ithreaderr) {
1586     ijoberr = ithreaderr;
1587   }
1588   return ijoberr;
1589 }
1590 /**** NO THREAD POOL FUNCTION ****/
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MainJob_Spawn"
1593 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1594   PetscErrorCode ijoberr = 0;
1595 
1596   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1597   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1598   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1599   free(apThread);
1600 
1601   return ijoberr;
1602 }
1603 /****  ****/
1604 #endif
1605 
1606 void* FuncFinish(void* arg) {
1607   PetscThreadGo = PETSC_FALSE;
1608   return(0);
1609 }
1610 
1611 #endif
1612