xref: /petsc/src/sys/objects/init.c (revision 73260a9bf5f079fa3cde3d9bd07e3ae1f3545c60)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 //#if defined(PETSC_HAVE_SCHED_H) && defined(PETSC_USE_PTHREAD)
9 #ifndef _GNU_SOURCE
10 #define _GNU_SOURCE
11 #endif
12 #include <sched.h>
13 //#endif
14 #include <petscsys.h>        /*I  "petscsys.h"   I*/
15 #if defined(PETSC_USE_PTHREAD)
16 #include <pthread.h>
17 #endif
18 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
19 #include <sys/sysinfo.h>
20 #endif
21 #include <unistd.h>
22 #if defined(PETSC_HAVE_STDLIB_H)
23 #include <stdlib.h>
24 #endif
25 #if defined(PETSC_HAVE_MALLOC_H)
26 #include <malloc.h>
27 #endif
28 #if defined(PETSC_HAVE_VALGRIND)
29 #include <valgrind/valgrind.h>
30 #endif
31 
32 /* ------------------------Nasty global variables -------------------------------*/
33 /*
34      Indicates if PETSc started up MPI, or it was
35    already started before PETSc was initialized.
36 */
37 PetscBool    PetscBeganMPI         = PETSC_FALSE;
38 PetscBool    PetscInitializeCalled = PETSC_FALSE;
39 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
40 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
41 PetscBool    PetscThreadGo         = PETSC_TRUE;
42 PetscMPIInt  PetscGlobalRank = -1;
43 PetscMPIInt  PetscGlobalSize = -1;
44 
45 #if defined(PETSC_HAVE_PTHREADCLASSES)
46 PetscMPIInt  PetscMaxThreads = 2;
47 pthread_t*   PetscThreadPoint;
48 #define PETSC_HAVE_PTHREAD_BARRIER
49 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
50 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
51 #endif
52 PetscErrorCode ithreaderr = 0;
53 int*         pVal;
54 
55 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
56 int* ThreadCoreAffinity;
57 
58 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
59 
60 typedef struct {
61   pthread_mutex_t** mutexarray;
62   pthread_cond_t**  cond1array;
63   pthread_cond_t** cond2array;
64   void* (*pfunc)(void*);
65   void** pdata;
66   PetscBool startJob;
67   estat eJobStat;
68   PetscBool** arrThreadStarted;
69   PetscBool** arrThreadReady;
70 } sjob_tree;
71 sjob_tree job_tree;
72 typedef struct {
73   pthread_mutex_t** mutexarray;
74   pthread_cond_t**  cond1array;
75   pthread_cond_t** cond2array;
76   void* (*pfunc)(void*);
77   void** pdata;
78   PetscBool** arrThreadReady;
79 } sjob_main;
80 sjob_main job_main;
81 typedef struct {
82   pthread_mutex_t** mutexarray;
83   pthread_cond_t**  cond1array;
84   pthread_cond_t** cond2array;
85   void* (*pfunc)(void*);
86   void** pdata;
87   PetscBool startJob;
88   estat eJobStat;
89   PetscBool** arrThreadStarted;
90   PetscBool** arrThreadReady;
91 } sjob_chain;
92 sjob_chain job_chain;
93 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
94 typedef struct {
95   pthread_mutex_t mutex;
96   pthread_cond_t cond;
97   void* (*pfunc)(void*);
98   void** pdata;
99   pthread_barrier_t* pbarr;
100   int iNumJobThreads;
101   int iNumReadyThreads;
102   PetscBool startJob;
103 } sjob_true;
104 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
105 #endif
106 
107 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
108 char* arrmutex; /* used by 'chain','main','tree' thread pools */
109 char* arrcond1; /* used by 'chain','main','tree' thread pools */
110 char* arrcond2; /* used by 'chain','main','tree' thread pools */
111 char* arrstart; /* used by 'chain','main','tree' thread pools */
112 char* arrready; /* used by 'chain','main','tree' thread pools */
113 
114 /* Function Pointers */
115 void*          (*PetscThreadFunc)(void*) = NULL;
116 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
117 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
118 void           (*MainWait)(void) = NULL;
119 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
120 /**** Tree Thread Pool Functions ****/
121 void*          PetscThreadFunc_Tree(void*);
122 void*          PetscThreadInitialize_Tree(PetscInt);
123 PetscErrorCode PetscThreadFinalize_Tree(void);
124 void           MainWait_Tree(void);
125 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
126 /**** Main Thread Pool Functions ****/
127 void*          PetscThreadFunc_Main(void*);
128 void*          PetscThreadInitialize_Main(PetscInt);
129 PetscErrorCode PetscThreadFinalize_Main(void);
130 void           MainWait_Main(void);
131 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
132 /**** Chain Thread Pool Functions ****/
133 void*          PetscThreadFunc_Chain(void*);
134 void*          PetscThreadInitialize_Chain(PetscInt);
135 PetscErrorCode PetscThreadFinalize_Chain(void);
136 void           MainWait_Chain(void);
137 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
138 /**** True Thread Pool Functions ****/
139 void*          PetscThreadFunc_True(void*);
140 void*          PetscThreadInitialize_True(PetscInt);
141 PetscErrorCode PetscThreadFinalize_True(void);
142 void           MainWait_True(void);
143 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
144 /**** NO Thread Pool Function  ****/
145 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
146 /****  ****/
147 void* FuncFinish(void*);
148 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
149 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
150 #endif
151 
152 #if defined(PETSC_USE_COMPLEX)
153 #if defined(PETSC_COMPLEX_INSTANTIATE)
154 template <> class std::complex<double>; /* instantiate complex template class */
155 #endif
156 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
157 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
158 MPI_Datatype   MPI_C_COMPLEX;
159 #endif
160 PetscScalar    PETSC_i;
161 #else
162 PetscScalar    PETSC_i = 0.0;
163 #endif
164 #if defined(PETSC_USE_REAL___FLOAT128)
165 MPI_Datatype   MPIU___FLOAT128 = 0;
166 #endif
167 MPI_Datatype   MPIU_2SCALAR = 0;
168 MPI_Datatype   MPIU_2INT = 0;
169 
170 /*
171      These are needed by petscbt.h
172 */
173 #include <petscbt.h>
174 char      _BT_mask = ' ';
175 char      _BT_c = ' ';
176 PetscInt  _BT_idx  = 0;
177 
178 /*
179        Function that is called to display all error messages
180 */
181 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
182 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
183 #if defined(PETSC_HAVE_MATLAB_ENGINE)
184 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
185 #else
186 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
187 #endif
188 /*
189   This is needed to turn on/off cusp synchronization */
190 PetscBool   synchronizeCUSP = PETSC_FALSE;
191 
192 /* ------------------------------------------------------------------------------*/
193 /*
194    Optional file where all PETSc output from various prints is saved
195 */
196 FILE *petsc_history = PETSC_NULL;
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "PetscOpenHistoryFile"
200 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
201 {
202   PetscErrorCode ierr;
203   PetscMPIInt    rank,size;
204   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
205   char           version[256];
206 
207   PetscFunctionBegin;
208   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
209   if (!rank) {
210     char        arch[10];
211     int         err;
212     PetscViewer viewer;
213 
214     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
215     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
216     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
217     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
218     if (filename) {
219       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
220     } else {
221       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
222       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
223       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
224     }
225 
226     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
227     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
228     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
229     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
230     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
231     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
232     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
233     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
234     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
235     err = fflush(*fd);
236     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
237   }
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "PetscCloseHistoryFile"
243 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
244 {
245   PetscErrorCode ierr;
246   PetscMPIInt    rank;
247   char           date[64];
248   int            err;
249 
250   PetscFunctionBegin;
251   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
252   if (!rank) {
253     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
254     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
255     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
256     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
257     err = fflush(*fd);
258     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
259     err = fclose(*fd);
260     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 /* ------------------------------------------------------------------------------*/
266 
267 /*
268    This is ugly and probably belongs somewhere else, but I want to
269   be able to put a true MPI abort error handler with command line args.
270 
271     This is so MPI errors in the debugger will leave all the stack
272   frames. The default MP_Abort() cleans up and exits thus providing no useful information
273   in the debugger hence we call abort() instead of MPI_Abort().
274 */
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "Petsc_MPI_AbortOnError"
278 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
279 {
280   PetscFunctionBegin;
281   (*PetscErrorPrintf)("MPI error %d\n",*flag);
282   abort();
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
287 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
288 {
289   PetscErrorCode ierr;
290 
291   PetscFunctionBegin;
292   (*PetscErrorPrintf)("MPI error %d\n",*flag);
293   ierr = PetscAttachDebugger();
294   if (ierr) { /* hopeless so get out */
295     MPI_Abort(*comm,*flag);
296   }
297 }
298 
299 #undef __FUNCT__
300 #define __FUNCT__ "PetscEnd"
301 /*@C
302    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
303      wishes a clean exit somewhere deep in the program.
304 
305    Collective on PETSC_COMM_WORLD
306 
307    Options Database Keys are the same as for PetscFinalize()
308 
309    Level: advanced
310 
311    Note:
312    See PetscInitialize() for more general runtime options.
313 
314 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
315 @*/
316 PetscErrorCode  PetscEnd(void)
317 {
318   PetscFunctionBegin;
319   PetscFinalize();
320   exit(0);
321   return 0;
322 }
323 
324 PetscBool    PetscOptionsPublish = PETSC_FALSE;
325 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
326 extern PetscBool  petscsetmallocvisited;
327 static char       emacsmachinename[256];
328 
329 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
330 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
331 
332 #undef __FUNCT__
333 #define __FUNCT__ "PetscSetHelpVersionFunctions"
334 /*@C
335    PetscSetHelpVersionFunctions - Sets functions that print help and version information
336    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
337    This routine enables a "higher-level" package that uses PETSc to print its messages first.
338 
339    Input Parameter:
340 +  help - the help function (may be PETSC_NULL)
341 -  version - the version function (may be PETSC_NULL)
342 
343    Level: developer
344 
345    Concepts: package help message
346 
347 @*/
348 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
349 {
350   PetscFunctionBegin;
351   PetscExternalHelpFunction    = help;
352   PetscExternalVersionFunction = version;
353   PetscFunctionReturn(0);
354 }
355 
356 #undef __FUNCT__
357 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
358 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
359 {
360   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
361   MPI_Comm       comm = PETSC_COMM_WORLD;
362   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
363   PetscErrorCode ierr;
364   PetscReal      si;
365   int            i;
366   PetscMPIInt    rank;
367   char           version[256];
368 
369   PetscFunctionBegin;
370   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
371 
372   /*
373       Setup the memory management; support for tracing malloc() usage
374   */
375   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
376 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
377   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
378   if ((!flg2 || flg1) && !petscsetmallocvisited) {
379 #if defined(PETSC_HAVE_VALGRIND)
380     if (flg2 || !(RUNNING_ON_VALGRIND)) {
381       /* turn off default -malloc if valgrind is being used */
382 #endif
383       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
384 #if defined(PETSC_HAVE_VALGRIND)
385     }
386 #endif
387   }
388 #else
389   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
390   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
391   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
392 #endif
393   if (flg3) {
394     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
395   }
396   flg1 = PETSC_FALSE;
397   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
398   if (flg1) {
399     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
400     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
401   }
402 
403   flg1 = PETSC_FALSE;
404   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
405   if (!flg1) {
406     flg1 = PETSC_FALSE;
407     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
408   }
409   if (flg1) {
410     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
411   }
412 
413   /*
414       Set the display variable for graphics
415   */
416   ierr = PetscSetDisplay();CHKERRQ(ierr);
417 
418 
419   /*
420       Print the PETSc version information
421   */
422   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
423   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
424   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
425   if (flg1 || flg2 || flg3){
426 
427     /*
428        Print "higher-level" package version message
429     */
430     if (PetscExternalVersionFunction) {
431       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
432     }
433 
434     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
435     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
436 ------------------------------\n");CHKERRQ(ierr);
437     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
439     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
441     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
442     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
443     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
444 ------------------------------\n");CHKERRQ(ierr);
445   }
446 
447   /*
448        Print "higher-level" package help message
449   */
450   if (flg3){
451     if (PetscExternalHelpFunction) {
452       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
453     }
454   }
455 
456   /*
457       Setup the error handling
458   */
459   flg1 = PETSC_FALSE;
460   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
461   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
462   flg1 = PETSC_FALSE;
463   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
464   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
465   flg1 = PETSC_FALSE;
466   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
467   if (flg1) {
468     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
469   }
470   flg1 = PETSC_FALSE;
471   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
472   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
473   flg1 = PETSC_FALSE;
474   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
475   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
476 
477   /*
478       Setup debugger information
479   */
480   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
481   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
482   if (flg1) {
483     MPI_Errhandler err_handler;
484 
485     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
486     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
487     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
488     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
489   }
490   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
491   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
492   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
493   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
494   if (flg1 || flg2) {
495     PetscMPIInt    size;
496     PetscInt       lsize,*nodes;
497     MPI_Errhandler err_handler;
498     /*
499        we have to make sure that all processors have opened
500        connections to all other processors, otherwise once the
501        debugger has stated it is likely to receive a SIGUSR1
502        and kill the program.
503     */
504     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
505     if (size > 2) {
506       PetscMPIInt dummy = 0;
507       MPI_Status  status;
508       for (i=0; i<size; i++) {
509         if (rank != i) {
510           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
511         }
512       }
513       for (i=0; i<size; i++) {
514         if (rank != i) {
515           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
516         }
517       }
518     }
519     /* check if this processor node should be in debugger */
520     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
521     lsize = size;
522     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
523     if (flag) {
524       for (i=0; i<lsize; i++) {
525         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
526       }
527     }
528     if (!flag) {
529       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
530       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
531       if (flg1) {
532         ierr = PetscAttachDebugger();CHKERRQ(ierr);
533       } else {
534         ierr = PetscStopForDebugger();CHKERRQ(ierr);
535       }
536       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
537       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
538     }
539     ierr = PetscFree(nodes);CHKERRQ(ierr);
540   }
541 
542   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
543   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
544 
545 #if defined(PETSC_USE_SOCKET_VIEWER)
546   /*
547     Activates new sockets for zope if needed
548   */
549   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
550   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
551   if (flgz){
552     int  sockfd;
553     char hostname[256];
554     char username[256];
555     int  remoteport = 9999;
556 
557     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
558     if (!hostname[0]){
559       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
560     }
561     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
562     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
563     PETSC_ZOPEFD = fdopen(sockfd, "w");
564     if (flgzout){
565       PETSC_STDOUT = PETSC_ZOPEFD;
566       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
567       fprintf(PETSC_STDOUT, "<<<start>>>");
568     } else {
569       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
570       fprintf(PETSC_ZOPEFD, "<<<start>>>");
571     }
572   }
573 #endif
574 #if defined(PETSC_USE_SERVER)
575   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
576   if (flgz){
577     PetscInt port = PETSC_DECIDE;
578     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
579     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
580   }
581 #endif
582 
583   /*
584         Setup profiling and logging
585   */
586 #if defined (PETSC_USE_INFO)
587   {
588     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
589     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
590     if (flg1 && logname[0]) {
591       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
592     } else if (flg1) {
593       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
594     }
595   }
596 #endif
597 #if defined(PETSC_USE_LOG)
598   mname[0] = 0;
599   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
600   if (flg1) {
601     if (mname[0]) {
602       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
603     } else {
604       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
605     }
606   }
607 #if defined(PETSC_HAVE_MPE)
608   flg1 = PETSC_FALSE;
609   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
610   if (flg1) PetscLogMPEBegin();
611 #endif
612   flg1 = PETSC_FALSE;
613   flg2 = PETSC_FALSE;
614   flg3 = PETSC_FALSE;
615   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
616   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
617   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
618   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
619   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
620   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
621 
622   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
623   if (flg1) {
624     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
625     FILE *file;
626     if (mname[0]) {
627       sprintf(name,"%s.%d",mname,rank);
628       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
629       file = fopen(fname,"w");
630       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
631     } else {
632       file = PETSC_STDOUT;
633     }
634     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
635   }
636 #endif
637 
638   /*
639       Setup building of stack frames for all function calls
640   */
641 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
642   ierr = PetscStackCreate();CHKERRQ(ierr);
643 #endif
644 
645   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
646 
647 #if defined(PETSC_HAVE_PTHREADCLASSES)
648   /*
649       Determine whether user specified maximum number of threads
650    */
651   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
652 
653   ierr = PetscOptionsHasName(PETSC_NULL,"-main",&flg1);CHKERRQ(ierr);
654   if(flg1) {
655     cpu_set_t mset;
656     int icorr,ncorr = get_nprocs();
657     ierr = PetscOptionsGetInt(PETSC_NULL,"-main",&icorr,PETSC_NULL);CHKERRQ(ierr);
658     CPU_ZERO(&mset);
659     CPU_SET(icorr%ncorr,&mset);
660     sched_setaffinity(0,sizeof(cpu_set_t),&mset);
661   }
662 
663   PetscInt N_CORES = get_nprocs();
664   ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
665   char tstr[9];
666   char tbuf[2];
667   strcpy(tstr,"-thread");
668   for(i=0;i<PetscMaxThreads;i++) {
669     ThreadCoreAffinity[i] = i;
670     sprintf(tbuf,"%d",i);
671     strcat(tstr,tbuf);
672     ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
673     if(flg1) {
674       ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
675       ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
676     }
677     tstr[7] = '\0';
678   }
679 
680   /*
681       Determine whether to use thread pool
682    */
683   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
684   if (flg1) {
685     PetscUseThreadPool = PETSC_TRUE;
686     /* get the thread pool type */
687     PetscInt ipool = 0;
688     const char *choices[4] = {"true","tree","main","chain"};
689 
690     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
691     switch(ipool) {
692     case 1:
693       PetscThreadFunc       = &PetscThreadFunc_Tree;
694       PetscThreadInitialize = &PetscThreadInitialize_Tree;
695       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
696       MainWait              = &MainWait_Tree;
697       MainJob               = &MainJob_Tree;
698       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
699       break;
700     case 2:
701       PetscThreadFunc       = &PetscThreadFunc_Main;
702       PetscThreadInitialize = &PetscThreadInitialize_Main;
703       PetscThreadFinalize   = &PetscThreadFinalize_Main;
704       MainWait              = &MainWait_Main;
705       MainJob               = &MainJob_Main;
706       PetscInfo(PETSC_NULL,"Using main thread pool\n");
707       break;
708 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
709     case 3:
710 #else
711     default:
712 #endif
713       PetscThreadFunc       = &PetscThreadFunc_Chain;
714       PetscThreadInitialize = &PetscThreadInitialize_Chain;
715       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
716       MainWait              = &MainWait_Chain;
717       MainJob               = &MainJob_Chain;
718       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
719       break;
720 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
721     default:
722       PetscThreadFunc       = &PetscThreadFunc_True;
723       PetscThreadInitialize = &PetscThreadInitialize_True;
724       PetscThreadFinalize   = &PetscThreadFinalize_True;
725       MainWait              = &MainWait_True;
726       MainJob               = &MainJob_True;
727       PetscInfo(PETSC_NULL,"Using true thread pool\n");
728       break;
729 #endif
730     }
731     PetscThreadInitialize(PetscMaxThreads);
732   } else {
733     //need to define these in the case on 'no threads' or 'thread create/destroy'
734     //could take any of the above versions
735     MainJob               = &MainJob_Spawn;
736   }
737 #endif
738   /*
739        Print basic help message
740   */
741   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
742   if (flg1) {
743     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
763     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
764     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
768     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
769     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
770     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
771     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
773     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
776     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
777 #if defined(PETSC_USE_LOG)
778     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
779     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
780     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
781 #if defined(PETSC_HAVE_MPE)
782     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
783 #endif
784     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
785 #endif
786     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
787     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
788     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
789     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
790   }
791 
792   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
793   if (flg1) {
794     ierr = PetscSleep(si);CHKERRQ(ierr);
795   }
796 
797   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
798   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
799   if (f) {
800     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
801   }
802 
803 #if defined(PETSC_HAVE_CUSP)
804   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
805   if (flg3) flg1 = PETSC_TRUE;
806   else flg1 = PETSC_FALSE;
807   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
808   if (flg1) synchronizeCUSP = PETSC_TRUE;
809 #endif
810 
811   PetscFunctionReturn(0);
812 }
813 
814 #if defined(PETSC_HAVE_PTHREADCLASSES)
815 
816 /**** 'Tree' Thread Pool Functions ****/
817 void* PetscThreadFunc_Tree(void* arg) {
818   PetscErrorCode iterr;
819   int icorr,ierr;
820   int* pId = (int*)arg;
821   int ThreadId = *pId,Mary = 2,i,SubWorker;
822   PetscBool PeeOn;
823   cpu_set_t mset;
824   //printf("Thread %d In Tree Thread Function\n",ThreadId);
825   icorr = ThreadCoreAffinity[ThreadId];
826   CPU_ZERO(&mset);
827   CPU_SET(icorr,&mset);
828   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
829 
830   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
831     PeeOn = PETSC_TRUE;
832   }
833   else {
834     PeeOn = PETSC_FALSE;
835   }
836   if(PeeOn==PETSC_FALSE) {
837     /* check your subordinates, wait for them to be ready */
838     for(i=1;i<=Mary;i++) {
839       SubWorker = Mary*ThreadId+i;
840       if(SubWorker<PetscMaxThreads) {
841         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
842         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
843           /* upon entry, automically releases the lock and blocks
844            upon return, has the lock */
845           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
846         }
847         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
848       }
849     }
850     /* your subordinates are now ready */
851   }
852   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
853   /* update your ready status */
854   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
855   if(ThreadId==0) {
856     job_tree.eJobStat = JobCompleted;
857     /* ignal main */
858     ierr = pthread_cond_signal(&main_cond);
859   }
860   else {
861     /* tell your boss that you're ready to work */
862     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
863   }
864   /* the while loop needs to have an exit
865   the 'main' thread can terminate all the threads by performing a broadcast
866    and calling FuncFinish */
867   while(PetscThreadGo) {
868     /*need to check the condition to ensure we don't have to wait
869       waiting when you don't have to causes problems
870      also need to check the condition to ensure proper handling of spurious wakeups */
871     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
872       /* upon entry, automically releases the lock and blocks
873        upon return, has the lock */
874         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
875 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
876 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
877     }
878     if(ThreadId==0) {
879       job_tree.startJob = PETSC_FALSE;
880       job_tree.eJobStat = ThreadsWorking;
881     }
882     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
883     if(PeeOn==PETSC_FALSE) {
884       /* tell your subordinates it's time to get to work */
885       for(i=1; i<=Mary; i++) {
886 	SubWorker = Mary*ThreadId+i;
887         if(SubWorker<PetscMaxThreads) {
888           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
889         }
890       }
891     }
892     /* do your job */
893     if(job_tree.pdata==NULL) {
894       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
895     }
896     else {
897       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
898     }
899     if(iterr!=0) {
900       ithreaderr = 1;
901     }
902     if(PetscThreadGo) {
903       /* reset job, get ready for more */
904       if(PeeOn==PETSC_FALSE) {
905         /* check your subordinates, waiting for them to be ready
906          how do you know for a fact that a given subordinate has actually started? */
907 	for(i=1;i<=Mary;i++) {
908 	  SubWorker = Mary*ThreadId+i;
909           if(SubWorker<PetscMaxThreads) {
910             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
911             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
912               /* upon entry, automically releases the lock and blocks
913                upon return, has the lock */
914               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
915             }
916             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
917           }
918 	}
919         /* your subordinates are now ready */
920       }
921       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
922       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
923       if(ThreadId==0) {
924 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
925         /* root thread signals 'main' */
926         ierr = pthread_cond_signal(&main_cond);
927       }
928       else {
929         /* signal your boss before you go to sleep */
930         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
931       }
932     }
933   }
934   return NULL;
935 }
936 
937 #undef __FUNCT__
938 #define __FUNCT__ "PetscThreadInitialize_Tree"
939 void* PetscThreadInitialize_Tree(PetscInt N) {
940   PetscInt i,ierr;
941   int status;
942 
943   if(PetscUseThreadPool) {
944     size_t Val1 = (size_t)CACHE_LINE_SIZE;
945     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
946     arrmutex = (char*)memalign(Val1,Val2);
947     arrcond1 = (char*)memalign(Val1,Val2);
948     arrcond2 = (char*)memalign(Val1,Val2);
949     arrstart = (char*)memalign(Val1,Val2);
950     arrready = (char*)memalign(Val1,Val2);
951     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
952     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
953     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
954     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
955     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
956     /* initialize job structure */
957     for(i=0; i<PetscMaxThreads; i++) {
958       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
959       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
960       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
961       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
962       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
963     }
964     for(i=0; i<PetscMaxThreads; i++) {
965       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
966       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
967       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
968       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
969       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
970     }
971     job_tree.pfunc = NULL;
972     job_tree.pdata = (void**)malloc(N*sizeof(void*));
973     job_tree.startJob = PETSC_FALSE;
974     job_tree.eJobStat = JobInitiated;
975     pVal = (int*)malloc(N*sizeof(int));
976     /* allocate memory in the heap for the thread structure */
977     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
978     /* create threads */
979     for(i=0; i<N; i++) {
980       pVal[i] = i;
981       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
982       /* should check status */
983     }
984   }
985   return NULL;
986 }
987 
988 #undef __FUNCT__
989 #define __FUNCT__ "PetscThreadFinalize_Tree"
990 PetscErrorCode PetscThreadFinalize_Tree() {
991   int i,ierr;
992   void* jstatus;
993 
994   PetscFunctionBegin;
995 
996   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
997   /* join the threads */
998   for(i=0; i<PetscMaxThreads; i++) {
999     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1000     /* do error checking*/
1001   }
1002   free(PetscThreadPoint);
1003   free(arrmutex);
1004   free(arrcond1);
1005   free(arrcond2);
1006   free(arrstart);
1007   free(arrready);
1008   free(job_tree.pdata);
1009   free(pVal);
1010 
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MainWait_Tree"
1016 void MainWait_Tree() {
1017   int ierr;
1018   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1019   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1020     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1021   }
1022   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MainJob_Tree"
1027 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1028   int i,ierr;
1029   PetscErrorCode ijoberr = 0;
1030 
1031   MainWait();
1032   job_tree.pfunc = pFunc;
1033   job_tree.pdata = data;
1034   job_tree.startJob = PETSC_TRUE;
1035   for(i=0; i<PetscMaxThreads; i++) {
1036     *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1037   }
1038   job_tree.eJobStat = JobInitiated;
1039   ierr = pthread_cond_signal(job_tree.cond2array[0]);
1040   if(pFunc!=FuncFinish) {
1041     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1042   }
1043 
1044   if(ithreaderr) {
1045     ijoberr = ithreaderr;
1046   }
1047   return ijoberr;
1048 }
1049 /****  ****/
1050 
1051 /**** 'Main' Thread Pool Functions ****/
1052 void* PetscThreadFunc_Main(void* arg) {
1053   PetscErrorCode iterr;
1054   int icorr,ierr;
1055   int* pId = (int*)arg;
1056   int ThreadId = *pId;
1057   cpu_set_t mset;
1058   //printf("Thread %d In Main Thread Function\n",ThreadId);
1059   icorr = ThreadCoreAffinity[ThreadId];
1060   CPU_ZERO(&mset);
1061   CPU_SET(icorr,&mset);
1062   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1063 
1064   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1065   /* update your ready status */
1066   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1067   /* tell the BOSS that you're ready to work before you go to sleep */
1068   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1069 
1070   /* the while loop needs to have an exit
1071      the 'main' thread can terminate all the threads by performing a broadcast
1072      and calling FuncFinish */
1073   while(PetscThreadGo) {
1074     /* need to check the condition to ensure we don't have to wait
1075        waiting when you don't have to causes problems
1076      also need to check the condition to ensure proper handling of spurious wakeups */
1077     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1078       /* upon entry, atomically releases the lock and blocks
1079        upon return, has the lock */
1080         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1081 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1082     }
1083     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1084     if(job_main.pdata==NULL) {
1085       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1086     }
1087     else {
1088       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1089     }
1090     if(iterr!=0) {
1091       ithreaderr = 1;
1092     }
1093     if(PetscThreadGo) {
1094       /* reset job, get ready for more */
1095       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1096       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1097       /* tell the BOSS that you're ready to work before you go to sleep */
1098       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1099     }
1100   }
1101   return NULL;
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "PetscThreadInitialize_Main"
1106 void* PetscThreadInitialize_Main(PetscInt N) {
1107   PetscInt i,ierr;
1108   int status;
1109 
1110   if(PetscUseThreadPool) {
1111     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1112     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1113     arrmutex = (char*)memalign(Val1,Val2);
1114     arrcond1 = (char*)memalign(Val1,Val2);
1115     arrcond2 = (char*)memalign(Val1,Val2);
1116     arrstart = (char*)memalign(Val1,Val2);
1117     arrready = (char*)memalign(Val1,Val2);
1118     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1119     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1120     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1121     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1122     /* initialize job structure */
1123     for(i=0; i<PetscMaxThreads; i++) {
1124       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1125       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1126       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1127       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1128     }
1129     for(i=0; i<PetscMaxThreads; i++) {
1130       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1131       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1132       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1133       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1134     }
1135     job_main.pfunc = NULL;
1136     job_main.pdata = (void**)malloc(N*sizeof(void*));
1137     pVal = (int*)malloc(N*sizeof(int));
1138     /* allocate memory in the heap for the thread structure */
1139     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1140     /* create threads */
1141     for(i=0; i<N; i++) {
1142       pVal[i] = i;
1143       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1144       /* error check */
1145     }
1146   }
1147   else {
1148   }
1149   return NULL;
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "PetscThreadFinalize_Main"
1154 PetscErrorCode PetscThreadFinalize_Main() {
1155   int i,ierr;
1156   void* jstatus;
1157 
1158   PetscFunctionBegin;
1159 
1160   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1161   /* join the threads */
1162   for(i=0; i<PetscMaxThreads; i++) {
1163     ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1164   }
1165   free(PetscThreadPoint);
1166   free(arrmutex);
1167   free(arrcond1);
1168   free(arrcond2);
1169   free(arrstart);
1170   free(arrready);
1171   free(job_main.pdata);
1172   free(pVal);
1173 
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MainWait_Main"
1179 void MainWait_Main() {
1180   int i,ierr;
1181   for(i=0; i<PetscMaxThreads; i++) {
1182     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1183     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1184       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1185     }
1186     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1187   }
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MainJob_Main"
1192 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1193   int i,ierr;
1194   PetscErrorCode ijoberr = 0;
1195 
1196   MainWait(); /* you know everyone is waiting to be signalled! */
1197   job_main.pfunc = pFunc;
1198   job_main.pdata = data;
1199   for(i=0; i<PetscMaxThreads; i++) {
1200     *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1201   }
1202   /* tell the threads to go to work */
1203   for(i=0; i<PetscMaxThreads; i++) {
1204     ierr = pthread_cond_signal(job_main.cond2array[i]);
1205   }
1206   if(pFunc!=FuncFinish) {
1207     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1208   }
1209 
1210   if(ithreaderr) {
1211     ijoberr = ithreaderr;
1212   }
1213   return ijoberr;
1214 }
1215 /****  ****/
1216 
1217 /**** Chain Thread Functions ****/
1218 void* PetscThreadFunc_Chain(void* arg) {
1219   PetscErrorCode iterr;
1220   int icorr,ierr;
1221   int* pId = (int*)arg;
1222   int ThreadId = *pId;
1223   int SubWorker = ThreadId + 1;
1224   PetscBool PeeOn;
1225   cpu_set_t mset;
1226   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1227   icorr = ThreadCoreAffinity[ThreadId];
1228   CPU_ZERO(&mset);
1229   CPU_SET(icorr,&mset);
1230   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1231 
1232   if(ThreadId==(PetscMaxThreads-1)) {
1233     PeeOn = PETSC_TRUE;
1234   }
1235   else {
1236     PeeOn = PETSC_FALSE;
1237   }
1238   if(PeeOn==PETSC_FALSE) {
1239     /* check your subordinate, wait for him to be ready */
1240     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1241     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1242       /* upon entry, automically releases the lock and blocks
1243        upon return, has the lock */
1244       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1245     }
1246     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1247     /* your subordinate is now ready*/
1248   }
1249   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1250   /* update your ready status */
1251   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1252   if(ThreadId==0) {
1253     job_chain.eJobStat = JobCompleted;
1254     /* signal main */
1255     ierr = pthread_cond_signal(&main_cond);
1256   }
1257   else {
1258     /* tell your boss that you're ready to work */
1259     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1260   }
1261   /*  the while loop needs to have an exit
1262      the 'main' thread can terminate all the threads by performing a broadcast
1263    and calling FuncFinish */
1264   while(PetscThreadGo) {
1265     /* need to check the condition to ensure we don't have to wait
1266        waiting when you don't have to causes problems
1267      also need to check the condition to ensure proper handling of spurious wakeups */
1268     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1269       /*upon entry, automically releases the lock and blocks
1270        upon return, has the lock */
1271         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1272 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1273 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1274     }
1275     if(ThreadId==0) {
1276       job_chain.startJob = PETSC_FALSE;
1277       job_chain.eJobStat = ThreadsWorking;
1278     }
1279     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1280     if(PeeOn==PETSC_FALSE) {
1281       /* tell your subworker it's time to get to work */
1282       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1283     }
1284     /* do your job */
1285     if(job_chain.pdata==NULL) {
1286       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1287     }
1288     else {
1289       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1290     }
1291     if(iterr!=0) {
1292       ithreaderr = 1;
1293     }
1294     if(PetscThreadGo) {
1295       /* reset job, get ready for more */
1296       if(PeeOn==PETSC_FALSE) {
1297         /* check your subordinate, wait for him to be ready
1298          how do you know for a fact that your subordinate has actually started? */
1299         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1300         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1301           /* upon entry, automically releases the lock and blocks
1302            upon return, has the lock */
1303           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1304         }
1305         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1306         /* your subordinate is now ready */
1307       }
1308       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1309       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1310       if(ThreadId==0) {
1311 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1312         /* root thread (foreman) signals 'main' */
1313         ierr = pthread_cond_signal(&main_cond);
1314       }
1315       else {
1316         /* signal your boss before you go to sleep */
1317         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1318       }
1319     }
1320   }
1321   return NULL;
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "PetscThreadInitialize_Chain"
1326 void* PetscThreadInitialize_Chain(PetscInt N) {
1327   PetscInt i,ierr;
1328   int status;
1329 
1330   if(PetscUseThreadPool) {
1331     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1332     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1333     arrmutex = (char*)memalign(Val1,Val2);
1334     arrcond1 = (char*)memalign(Val1,Val2);
1335     arrcond2 = (char*)memalign(Val1,Val2);
1336     arrstart = (char*)memalign(Val1,Val2);
1337     arrready = (char*)memalign(Val1,Val2);
1338     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1339     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1340     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1341     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1342     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1343     /* initialize job structure */
1344     for(i=0; i<PetscMaxThreads; i++) {
1345       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1346       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1347       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1348       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1349       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1350     }
1351     for(i=0; i<PetscMaxThreads; i++) {
1352       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1353       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1354       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1355       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1356       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1357     }
1358     job_chain.pfunc = NULL;
1359     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1360     job_chain.startJob = PETSC_FALSE;
1361     job_chain.eJobStat = JobInitiated;
1362     pVal = (int*)malloc(N*sizeof(int));
1363     /* allocate memory in the heap for the thread structure */
1364     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1365     /* create threads */
1366     for(i=0; i<N; i++) {
1367       pVal[i] = i;
1368       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1369       /* should check error */
1370     }
1371   }
1372   else {
1373   }
1374   return NULL;
1375 }
1376 
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "PetscThreadFinalize_Chain"
1380 PetscErrorCode PetscThreadFinalize_Chain() {
1381   int i,ierr;
1382   void* jstatus;
1383 
1384   PetscFunctionBegin;
1385 
1386   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1387   /* join the threads */
1388   for(i=0; i<PetscMaxThreads; i++) {
1389     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1390     /* should check error */
1391   }
1392   free(PetscThreadPoint);
1393   free(arrmutex);
1394   free(arrcond1);
1395   free(arrcond2);
1396   free(arrstart);
1397   free(arrready);
1398   free(job_chain.pdata);
1399   free(pVal);
1400 
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MainWait_Chain"
1406 void MainWait_Chain() {
1407   int ierr;
1408   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1409   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1410     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1411   }
1412   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MainJob_Chain"
1417 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1418   int i,ierr;
1419   PetscErrorCode ijoberr = 0;
1420 
1421   MainWait();
1422   job_chain.pfunc = pFunc;
1423   job_chain.pdata = data;
1424   job_chain.startJob = PETSC_TRUE;
1425   for(i=0; i<PetscMaxThreads; i++) {
1426     *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1427   }
1428   job_chain.eJobStat = JobInitiated;
1429   ierr = pthread_cond_signal(job_chain.cond2array[0]);
1430   if(pFunc!=FuncFinish) {
1431     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1432   }
1433 
1434   if(ithreaderr) {
1435     ijoberr = ithreaderr;
1436   }
1437   return ijoberr;
1438 }
1439 /****  ****/
1440 
1441 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1442 /**** True Thread Functions ****/
1443 void* PetscThreadFunc_True(void* arg) {
1444   int icorr,ierr,iVal;
1445   int* pId = (int*)arg;
1446   int ThreadId = *pId;
1447   PetscErrorCode iterr;
1448   cpu_set_t mset;
1449   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1450   icorr = ThreadCoreAffinity[ThreadId];
1451   CPU_ZERO(&mset);
1452   CPU_SET(icorr,&mset);
1453   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1454 
1455   ierr = pthread_mutex_lock(&job_true.mutex);
1456   job_true.iNumReadyThreads++;
1457   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1458     ierr = pthread_cond_signal(&main_cond);
1459   }
1460   /*the while loop needs to have an exit
1461     the 'main' thread can terminate all the threads by performing a broadcast
1462    and calling FuncFinish */
1463   while(PetscThreadGo) {
1464     /*need to check the condition to ensure we don't have to wait
1465       waiting when you don't have to causes problems
1466      also need to wait if another thread sneaks in and messes with the predicate */
1467     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1468       /* upon entry, automically releases the lock and blocks
1469        upon return, has the lock */
1470       //printf("Thread %d Going to Sleep!\n",ThreadId);
1471       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1472     }
1473     job_true.startJob = PETSC_FALSE;
1474     job_true.iNumJobThreads--;
1475     job_true.iNumReadyThreads--;
1476     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1477     pthread_mutex_unlock(&job_true.mutex);
1478     if(job_true.pdata==NULL) {
1479       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1480     }
1481     else {
1482       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1483     }
1484     if(iterr!=0) {
1485       ithreaderr = 1;
1486     }
1487     //printf("Thread %d Finished Job\n",ThreadId);
1488     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1489       what happens if a thread finishes before they all start? BAD!
1490      what happens if a thread finishes before any else start? BAD! */
1491     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1492     /* reset job */
1493     if(PetscThreadGo) {
1494       pthread_mutex_lock(&job_true.mutex);
1495       job_true.iNumReadyThreads++;
1496       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1497 	/* signal the 'main' thread that the job is done! (only done once) */
1498 	ierr = pthread_cond_signal(&main_cond);
1499       }
1500     }
1501   }
1502   return NULL;
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "PetscThreadInitialize_True"
1507 void* PetscThreadInitialize_True(PetscInt N) {
1508   PetscInt i;
1509   int status;
1510 
1511   pVal = (int*)malloc(N*sizeof(int));
1512   /* allocate memory in the heap for the thread structure */
1513   PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1514   BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1515   job_true.pdata = (void**)malloc(N*sizeof(void*));
1516   for(i=0; i<N; i++) {
1517     pVal[i] = i;
1518     status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1519     /* error check to ensure proper thread creation */
1520     status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1521     /* should check error */
1522   }
1523   //printf("Finished True Thread Pool Initialization\n");
1524   return NULL;
1525 }
1526 
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "PetscThreadFinalize_True"
1530 PetscErrorCode PetscThreadFinalize_True() {
1531   int i,ierr;
1532   void* jstatus;
1533 
1534   PetscFunctionBegin;
1535 
1536   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1537   /* join the threads */
1538   for(i=0; i<PetscMaxThreads; i++) {
1539     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1540   }
1541   free(BarrPoint);
1542   free(PetscThreadPoint);
1543 
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MainWait_True"
1549 void MainWait_True() {
1550   int ierr;
1551   ierr = pthread_mutex_lock(&job_true.mutex);
1552   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1553     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1554   }
1555   ierr = pthread_mutex_unlock(&job_true.mutex);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MainJob_True"
1560 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1561   int ierr;
1562   PetscErrorCode ijoberr = 0;
1563 
1564   MainWait();
1565   job_true.pfunc = pFunc;
1566   job_true.pdata = data;
1567   job_true.pbarr = &BarrPoint[n];
1568   job_true.iNumJobThreads = n;
1569   job_true.startJob = PETSC_TRUE;
1570   ierr = pthread_cond_broadcast(&job_true.cond);
1571   if(pFunc!=FuncFinish) {
1572     MainWait(); /* why wait after? guarantees that job gets done */
1573   }
1574 
1575   if(ithreaderr) {
1576     ijoberr = ithreaderr;
1577   }
1578   return ijoberr;
1579 }
1580 /**** NO THREAD POOL FUNCTION ****/
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MainJob_Spawn"
1583 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1584   PetscErrorCode ijoberr = 0;
1585 
1586   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1587   PetscThreadPoint = apThread; /* point to same place */
1588   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1589   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1590   free(apThread);
1591 
1592   return ijoberr;
1593 }
1594 /****  ****/
1595 #endif
1596 
1597 void* FuncFinish(void* arg) {
1598   PetscThreadGo = PETSC_FALSE;
1599   return(0);
1600 }
1601 
1602 #endif
1603