xref: /petsc/src/sys/objects/init.c (revision 6f69dda14f4762423a4b810eb2fa239be89fccf7)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #include <petscsys.h>        /*I  "petscsys.h"   I*/
10 
11 #ifndef _GNU_SOURCE
12 #define _GNU_SOURCE
13 #endif
14 #if defined(PETSC_HAVE_SCHED_H)
15 #ifndef __USE_GNU
16 #define __USE_GNU
17 #endif
18 #include <sched.h>
19 #endif
20 #if defined(PETSC_HAVE_PTHREAD_H)
21 #include <pthread.h>
22 #endif
23 
24 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
25 #include <sys/sysinfo.h>
26 #endif
27 #if defined(PETSC_HAVE_UNISTD_H)
28 #include <unistd.h>
29 #endif
30 #if defined(PETSC_HAVE_STDLIB_H)
31 #include <stdlib.h>
32 #endif
33 #if defined(PETSC_HAVE_MALLOC_H)
34 #include <malloc.h>
35 #endif
36 #if defined(PETSC_HAVE_VALGRIND)
37 #include <valgrind/valgrind.h>
38 #endif
39 
40 /* ------------------------Nasty global variables -------------------------------*/
41 /*
42      Indicates if PETSc started up MPI, or it was
43    already started before PETSc was initialized.
44 */
45 PetscBool    PetscBeganMPI         = PETSC_FALSE;
46 PetscBool    PetscInitializeCalled = PETSC_FALSE;
47 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
48 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
49 PetscBool    PetscThreadGo         = PETSC_TRUE;
50 PetscMPIInt  PetscGlobalRank = -1;
51 PetscMPIInt  PetscGlobalSize = -1;
52 
53 #if defined(PETSC_HAVE_PTHREADCLASSES)
54 PetscMPIInt  PetscMaxThreads = 2;
55 pthread_t*   PetscThreadPoint;
56 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
57 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
58 #endif
59 PetscErrorCode ithreaderr = 0;
60 int*         pVal;
61 
62 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
63 int* ThreadCoreAffinity;
64 
65 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
66 
67 typedef struct {
68   pthread_mutex_t** mutexarray;
69   pthread_cond_t**  cond1array;
70   pthread_cond_t** cond2array;
71   void* (*pfunc)(void*);
72   void** pdata;
73   PetscBool startJob;
74   estat eJobStat;
75   PetscBool** arrThreadStarted;
76   PetscBool** arrThreadReady;
77 } sjob_tree;
78 sjob_tree job_tree;
79 typedef struct {
80   pthread_mutex_t** mutexarray;
81   pthread_cond_t**  cond1array;
82   pthread_cond_t** cond2array;
83   void* (*pfunc)(void*);
84   void** pdata;
85   PetscBool** arrThreadReady;
86 } sjob_main;
87 sjob_main job_main;
88 typedef struct {
89   pthread_mutex_t** mutexarray;
90   pthread_cond_t**  cond1array;
91   pthread_cond_t** cond2array;
92   void* (*pfunc)(void*);
93   void** pdata;
94   PetscBool startJob;
95   estat eJobStat;
96   PetscBool** arrThreadStarted;
97   PetscBool** arrThreadReady;
98 } sjob_chain;
99 sjob_chain job_chain;
100 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
101 typedef struct {
102   pthread_mutex_t mutex;
103   pthread_cond_t cond;
104   void* (*pfunc)(void*);
105   void** pdata;
106   pthread_barrier_t* pbarr;
107   int iNumJobThreads;
108   int iNumReadyThreads;
109   PetscBool startJob;
110 } sjob_true;
111 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
112 #endif
113 
114 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
115 char* arrmutex; /* used by 'chain','main','tree' thread pools */
116 char* arrcond1; /* used by 'chain','main','tree' thread pools */
117 char* arrcond2; /* used by 'chain','main','tree' thread pools */
118 char* arrstart; /* used by 'chain','main','tree' thread pools */
119 char* arrready; /* used by 'chain','main','tree' thread pools */
120 
121 /* Function Pointers */
122 void*          (*PetscThreadFunc)(void*) = NULL;
123 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
124 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
125 void           (*MainWait)(void) = NULL;
126 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
127 /**** Tree Thread Pool Functions ****/
128 void*          PetscThreadFunc_Tree(void*);
129 void*          PetscThreadInitialize_Tree(PetscInt);
130 PetscErrorCode PetscThreadFinalize_Tree(void);
131 void           MainWait_Tree(void);
132 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
133 /**** Main Thread Pool Functions ****/
134 void*          PetscThreadFunc_Main(void*);
135 void*          PetscThreadInitialize_Main(PetscInt);
136 PetscErrorCode PetscThreadFinalize_Main(void);
137 void           MainWait_Main(void);
138 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
139 /**** Chain Thread Pool Functions ****/
140 void*          PetscThreadFunc_Chain(void*);
141 void*          PetscThreadInitialize_Chain(PetscInt);
142 PetscErrorCode PetscThreadFinalize_Chain(void);
143 void           MainWait_Chain(void);
144 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
145 /**** True Thread Pool Functions ****/
146 void*          PetscThreadFunc_True(void*);
147 void*          PetscThreadInitialize_True(PetscInt);
148 PetscErrorCode PetscThreadFinalize_True(void);
149 void           MainWait_True(void);
150 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
151 /**** NO Thread Pool Function  ****/
152 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
153 /****  ****/
154 void* FuncFinish(void*);
155 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
156 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
157 #endif
158 
159 #if defined(PETSC_USE_COMPLEX)
160 #if defined(PETSC_COMPLEX_INSTANTIATE)
161 template <> class std::complex<double>; /* instantiate complex template class */
162 #endif
163 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
164 MPI_Datatype   MPIU_C_DOUBLE_COMPLEX;
165 MPI_Datatype   MPIU_C_COMPLEX;
166 #endif
167 PetscScalar    PETSC_i;
168 #else
169 PetscScalar    PETSC_i = 0.0;
170 #endif
171 #if defined(PETSC_USE_REAL___FLOAT128)
172 MPI_Datatype   MPIU___FLOAT128 = 0;
173 #endif
174 MPI_Datatype   MPIU_2SCALAR = 0;
175 MPI_Datatype   MPIU_2INT = 0;
176 
177 /*
178      These are needed by petscbt.h
179 */
180 #include <petscbt.h>
181 char      _BT_mask = ' ';
182 char      _BT_c = ' ';
183 PetscInt  _BT_idx  = 0;
184 
185 /*
186        Function that is called to display all error messages
187 */
188 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
189 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
190 #if defined(PETSC_HAVE_MATLAB_ENGINE)
191 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
192 #else
193 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
194 #endif
195 /*
196   This is needed to turn on/off cusp synchronization */
197 PetscBool   synchronizeCUSP = PETSC_FALSE;
198 
199 /* ------------------------------------------------------------------------------*/
200 /*
201    Optional file where all PETSc output from various prints is saved
202 */
203 FILE *petsc_history = PETSC_NULL;
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "PetscOpenHistoryFile"
207 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
208 {
209   PetscErrorCode ierr;
210   PetscMPIInt    rank,size;
211   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
212   char           version[256];
213 
214   PetscFunctionBegin;
215   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
216   if (!rank) {
217     char        arch[10];
218     int         err;
219     PetscViewer viewer;
220 
221     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
222     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
223     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
224     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
225     if (filename) {
226       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
227     } else {
228       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
229       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
230       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
231     }
232 
233     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
234     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
235     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
236     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
237     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
238     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
239     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
240     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
241     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
242     err = fflush(*fd);
243     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
244   }
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "PetscCloseHistoryFile"
250 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
251 {
252   PetscErrorCode ierr;
253   PetscMPIInt    rank;
254   char           date[64];
255   int            err;
256 
257   PetscFunctionBegin;
258   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
259   if (!rank) {
260     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
261     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
262     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
263     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
264     err = fflush(*fd);
265     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
266     err = fclose(*fd);
267     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 /* ------------------------------------------------------------------------------*/
273 
274 /*
275    This is ugly and probably belongs somewhere else, but I want to
276   be able to put a true MPI abort error handler with command line args.
277 
278     This is so MPI errors in the debugger will leave all the stack
279   frames. The default MP_Abort() cleans up and exits thus providing no useful information
280   in the debugger hence we call abort() instead of MPI_Abort().
281 */
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "Petsc_MPI_AbortOnError"
285 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
286 {
287   PetscFunctionBegin;
288   (*PetscErrorPrintf)("MPI error %d\n",*flag);
289   abort();
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
294 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
295 {
296   PetscErrorCode ierr;
297 
298   PetscFunctionBegin;
299   (*PetscErrorPrintf)("MPI error %d\n",*flag);
300   ierr = PetscAttachDebugger();
301   if (ierr) { /* hopeless so get out */
302     MPI_Abort(*comm,*flag);
303   }
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "PetscEnd"
308 /*@C
309    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
310      wishes a clean exit somewhere deep in the program.
311 
312    Collective on PETSC_COMM_WORLD
313 
314    Options Database Keys are the same as for PetscFinalize()
315 
316    Level: advanced
317 
318    Note:
319    See PetscInitialize() for more general runtime options.
320 
321 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
322 @*/
323 PetscErrorCode  PetscEnd(void)
324 {
325   PetscFunctionBegin;
326   PetscFinalize();
327   exit(0);
328   return 0;
329 }
330 
331 PetscBool    PetscOptionsPublish = PETSC_FALSE;
332 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
333 extern PetscBool  petscsetmallocvisited;
334 static char       emacsmachinename[256];
335 
336 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
337 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "PetscSetHelpVersionFunctions"
341 /*@C
342    PetscSetHelpVersionFunctions - Sets functions that print help and version information
343    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
344    This routine enables a "higher-level" package that uses PETSc to print its messages first.
345 
346    Input Parameter:
347 +  help - the help function (may be PETSC_NULL)
348 -  version - the version function (may be PETSC_NULL)
349 
350    Level: developer
351 
352    Concepts: package help message
353 
354 @*/
355 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
356 {
357   PetscFunctionBegin;
358   PetscExternalHelpFunction    = help;
359   PetscExternalVersionFunction = version;
360   PetscFunctionReturn(0);
361 }
362 
363 #if defined(PETSC_HAVE_CPU_SET_T)
364 PETSC_STATIC_INLINE PetscErrorCode PetscPthreadSetAffinity(PetscInt icorr)
365 {
366   cpu_set_t mset;
367   int ncorr = get_nprocs();
368 
369   CPU_ZERO(&mset);
370   CPU_SET(icorr%ncorr,&mset);
371   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
372   return 0;
373 }
374 #endif
375 
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
379 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
380 {
381   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
382   MPI_Comm       comm = PETSC_COMM_WORLD;
383   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
384   PetscErrorCode ierr;
385   PetscReal      si;
386   int            i;
387   PetscMPIInt    rank;
388   char           version[256];
389 
390   PetscFunctionBegin;
391   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
392 
393   /*
394       Setup the memory management; support for tracing malloc() usage
395   */
396   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
397 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
398   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
399   if ((!flg2 || flg1) && !petscsetmallocvisited) {
400 #if defined(PETSC_HAVE_VALGRIND)
401     if (flg2 || !(RUNNING_ON_VALGRIND)) {
402       /* turn off default -malloc if valgrind is being used */
403 #endif
404       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
405 #if defined(PETSC_HAVE_VALGRIND)
406     }
407 #endif
408   }
409 #else
410   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
411   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
412   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
413 #endif
414   if (flg3) {
415     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
416   }
417   flg1 = PETSC_FALSE;
418   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
419   if (flg1) {
420     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
421     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
422   }
423 
424   flg1 = PETSC_FALSE;
425   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
426   if (!flg1) {
427     flg1 = PETSC_FALSE;
428     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
429   }
430   if (flg1) {
431     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
432   }
433 
434   /*
435       Set the display variable for graphics
436   */
437   ierr = PetscSetDisplay();CHKERRQ(ierr);
438 
439 
440   /*
441       Print the PETSc version information
442   */
443   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
444   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
445   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
446   if (flg1 || flg2 || flg3){
447 
448     /*
449        Print "higher-level" package version message
450     */
451     if (PetscExternalVersionFunction) {
452       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
453     }
454 
455     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
456     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
457 ------------------------------\n");CHKERRQ(ierr);
458     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
459     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
460     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
461     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
462     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
463     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
464     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
465 ------------------------------\n");CHKERRQ(ierr);
466   }
467 
468   /*
469        Print "higher-level" package help message
470   */
471   if (flg3){
472     if (PetscExternalHelpFunction) {
473       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
474     }
475   }
476 
477   /*
478       Setup the error handling
479   */
480   flg1 = PETSC_FALSE;
481   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
482   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
483   flg1 = PETSC_FALSE;
484   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
485   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
486   flg1 = PETSC_FALSE;
487   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
488   if (flg1) {
489     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
490   }
491   flg1 = PETSC_FALSE;
492   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
493   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
494   flg1 = PETSC_FALSE;
495   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
496   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
497 
498   /*
499       Setup debugger information
500   */
501   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
502   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
503   if (flg1) {
504     MPI_Errhandler err_handler;
505 
506     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
507     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
508     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
509     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
510   }
511   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
512   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
513   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
514   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
515   if (flg1 || flg2) {
516     PetscMPIInt    size;
517     PetscInt       lsize,*nodes;
518     MPI_Errhandler err_handler;
519     /*
520        we have to make sure that all processors have opened
521        connections to all other processors, otherwise once the
522        debugger has stated it is likely to receive a SIGUSR1
523        and kill the program.
524     */
525     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
526     if (size > 2) {
527       PetscMPIInt dummy = 0;
528       MPI_Status  status;
529       for (i=0; i<size; i++) {
530         if (rank != i) {
531           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
532         }
533       }
534       for (i=0; i<size; i++) {
535         if (rank != i) {
536           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
537         }
538       }
539     }
540     /* check if this processor node should be in debugger */
541     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
542     lsize = size;
543     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
544     if (flag) {
545       for (i=0; i<lsize; i++) {
546         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
547       }
548     }
549     if (!flag) {
550       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
551       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
552       if (flg1) {
553         ierr = PetscAttachDebugger();CHKERRQ(ierr);
554       } else {
555         ierr = PetscStopForDebugger();CHKERRQ(ierr);
556       }
557       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
558       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
559     }
560     ierr = PetscFree(nodes);CHKERRQ(ierr);
561   }
562 
563   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
564   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
565 
566 #if defined(PETSC_USE_SOCKET_VIEWER)
567   /*
568     Activates new sockets for zope if needed
569   */
570   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
571   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
572   if (flgz){
573     int  sockfd;
574     char hostname[256];
575     char username[256];
576     int  remoteport = 9999;
577 
578     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
579     if (!hostname[0]){
580       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
581     }
582     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
583     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
584     PETSC_ZOPEFD = fdopen(sockfd, "w");
585     if (flgzout){
586       PETSC_STDOUT = PETSC_ZOPEFD;
587       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
588       fprintf(PETSC_STDOUT, "<<<start>>>");
589     } else {
590       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
591       fprintf(PETSC_ZOPEFD, "<<<start>>>");
592     }
593   }
594 #endif
595 #if defined(PETSC_USE_SERVER)
596   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
597   if (flgz){
598     PetscInt port = PETSC_DECIDE;
599     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
600     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
601   }
602 #endif
603 
604   /*
605         Setup profiling and logging
606   */
607 #if defined (PETSC_USE_INFO)
608   {
609     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
610     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
611     if (flg1 && logname[0]) {
612       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
613     } else if (flg1) {
614       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
615     }
616   }
617 #endif
618 #if defined(PETSC_USE_LOG)
619   mname[0] = 0;
620   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
621   if (flg1) {
622     if (mname[0]) {
623       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
624     } else {
625       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
626     }
627   }
628 #if defined(PETSC_HAVE_MPE)
629   flg1 = PETSC_FALSE;
630   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
631   if (flg1) PetscLogMPEBegin();
632 #endif
633   flg1 = PETSC_FALSE;
634   flg2 = PETSC_FALSE;
635   flg3 = PETSC_FALSE;
636   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
637   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
638   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
639   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
640   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
641   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
642 
643   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
644   if (flg1) {
645     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
646     FILE *file;
647     if (mname[0]) {
648       sprintf(name,"%s.%d",mname,rank);
649       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
650       file = fopen(fname,"w");
651       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
652     } else {
653       file = PETSC_STDOUT;
654     }
655     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
656   }
657 #endif
658 
659   /*
660       Setup building of stack frames for all function calls
661   */
662 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
663   ierr = PetscStackCreate();CHKERRQ(ierr);
664 #endif
665 
666   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
667 
668 #if defined(PETSC_HAVE_PTHREADCLASSES)
669   /*
670       Determine whether user specified maximum number of threads
671    */
672   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
673 
674   ierr = PetscOptionsHasName(PETSC_NULL,"-main",&flg1);CHKERRQ(ierr);
675   if(flg1) {
676     PetscInt icorr;
677     ierr = PetscOptionsGetInt(PETSC_NULL,"-main",&icorr,PETSC_NULL);CHKERRQ(ierr);
678 #if defined(PETSC_HAVE_CPU_SET_T)
679     ierr = PetscPthreadSetAffinity(icorr);CHKERRQ(ierr);
680 #endif
681   }
682 
683 #if defined(PETSC_HAVE_CPU_SET_T)
684   PetscInt N_CORES = get_nprocs();
685   ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
686   char tstr[9];
687   char tbuf[2];
688   strcpy(tstr,"-thread");
689   for(i=0;i<PetscMaxThreads;i++) {
690     ThreadCoreAffinity[i] = i;
691     sprintf(tbuf,"%d",i);
692     strcat(tstr,tbuf);
693     ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
694     if(flg1) {
695       ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
696       ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
697     }
698     tstr[7] = '\0';
699   }
700 #endif
701 
702   /*
703       Determine whether to use thread pool
704    */
705   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
706   if (flg1) {
707     PetscUseThreadPool = PETSC_TRUE;
708     /* get the thread pool type */
709     PetscInt ipool = 0;
710     const char *choices[4] = {"true","tree","main","chain"};
711 
712     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
713     switch(ipool) {
714     case 1:
715       PetscThreadFunc       = &PetscThreadFunc_Tree;
716       PetscThreadInitialize = &PetscThreadInitialize_Tree;
717       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
718       MainWait              = &MainWait_Tree;
719       MainJob               = &MainJob_Tree;
720       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
721       break;
722     case 2:
723       PetscThreadFunc       = &PetscThreadFunc_Main;
724       PetscThreadInitialize = &PetscThreadInitialize_Main;
725       PetscThreadFinalize   = &PetscThreadFinalize_Main;
726       MainWait              = &MainWait_Main;
727       MainJob               = &MainJob_Main;
728       PetscInfo(PETSC_NULL,"Using main thread pool\n");
729       break;
730     case 3:
731       PetscThreadFunc       = &PetscThreadFunc_Chain;
732       PetscThreadInitialize = &PetscThreadInitialize_Chain;
733       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
734       MainWait              = &MainWait_Chain;
735       MainJob               = &MainJob_Chain;
736       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
737       break;
738 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
739     default:
740       PetscThreadFunc       = &PetscThreadFunc_True;
741       PetscThreadInitialize = &PetscThreadInitialize_True;
742       PetscThreadFinalize   = &PetscThreadFinalize_True;
743       MainWait              = &MainWait_True;
744       MainJob               = &MainJob_True;
745       PetscInfo(PETSC_NULL,"Using true thread pool\n");
746       break;
747 # else
748     default:
749       PetscThreadFunc       = &PetscThreadFunc_Chain;
750       PetscThreadInitialize = &PetscThreadInitialize_Chain;
751       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
752       MainWait              = &MainWait_Chain;
753       MainJob               = &MainJob_Chain;
754       PetscInfo(PETSC_NULL,"Cannot use true thread pool since pthread_barrier_t is not available,using chain thread pool instead\n");
755       break;
756 #endif
757     }
758     PetscThreadInitialize(PetscMaxThreads);
759   } else {
760     //need to define these in the case on 'no threads' or 'thread create/destroy'
761     //could take any of the above versions
762     MainJob               = &MainJob_Spawn;
763   }
764 #endif
765   /*
766        Print basic help message
767   */
768   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
769   if (flg1) {
770     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
771     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
773     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
776     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
777     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
778     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
779     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
780     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
781     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
782     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
783     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
784     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
785     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
786     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
787     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
788     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
789     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
790     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
791     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
792     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
793     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
794     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
795     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
796     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
797     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
798     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
799     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
800     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
801     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
802     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
803     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
804 #if defined(PETSC_USE_LOG)
805     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
806     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
807     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
808 #if defined(PETSC_HAVE_MPE)
809     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
810 #endif
811     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
812 #endif
813     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
814     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
815     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
816     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
817   }
818 
819   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
820   if (flg1) {
821     ierr = PetscSleep(si);CHKERRQ(ierr);
822   }
823 
824   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
825   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
826   if (f) {
827     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
828   }
829 
830 #if defined(PETSC_HAVE_CUSP)
831   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
832   if (flg3) flg1 = PETSC_TRUE;
833   else flg1 = PETSC_FALSE;
834   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
835   if (flg1) synchronizeCUSP = PETSC_TRUE;
836 #endif
837 
838   PetscFunctionReturn(0);
839 }
840 
841 #if defined(PETSC_HAVE_PTHREADCLASSES)
842 
843 /**** 'Tree' Thread Pool Functions ****/
844 void* PetscThreadFunc_Tree(void* arg) {
845   PetscErrorCode iterr;
846   int ierr;
847   int* pId = (int*)arg;
848   int ThreadId = *pId,Mary = 2,i,SubWorker;
849   PetscBool PeeOn;
850 #if defined(PETSC_HAVE_CPU_SET_T)
851   iterr = PetscPthreadSetAffinity(ThreadCoreAffinity[ThreadId]);CHKERRQ(iterr);
852 #endif
853   //printf("Thread %d In Tree Thread Function\n",ThreadId);
854 
855   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
856     PeeOn = PETSC_TRUE;
857   }
858   else {
859     PeeOn = PETSC_FALSE;
860   }
861   if(PeeOn==PETSC_FALSE) {
862     /* check your subordinates, wait for them to be ready */
863     for(i=1;i<=Mary;i++) {
864       SubWorker = Mary*ThreadId+i;
865       if(SubWorker<PetscMaxThreads) {
866         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
867         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
868           /* upon entry, automically releases the lock and blocks
869            upon return, has the lock */
870           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
871         }
872         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
873       }
874     }
875     /* your subordinates are now ready */
876   }
877   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
878   /* update your ready status */
879   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
880   if(ThreadId==0) {
881     job_tree.eJobStat = JobCompleted;
882     /* ignal main */
883     ierr = pthread_cond_signal(&main_cond);
884   }
885   else {
886     /* tell your boss that you're ready to work */
887     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
888   }
889   /* the while loop needs to have an exit
890   the 'main' thread can terminate all the threads by performing a broadcast
891    and calling FuncFinish */
892   while(PetscThreadGo) {
893     /*need to check the condition to ensure we don't have to wait
894       waiting when you don't have to causes problems
895      also need to check the condition to ensure proper handling of spurious wakeups */
896     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
897       /* upon entry, automically releases the lock and blocks
898        upon return, has the lock */
899         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
900 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
901 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
902     }
903     if(ThreadId==0) {
904       job_tree.startJob = PETSC_FALSE;
905       job_tree.eJobStat = ThreadsWorking;
906     }
907     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
908     if(PeeOn==PETSC_FALSE) {
909       /* tell your subordinates it's time to get to work */
910       for(i=1; i<=Mary; i++) {
911 	SubWorker = Mary*ThreadId+i;
912         if(SubWorker<PetscMaxThreads) {
913           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
914         }
915       }
916     }
917     /* do your job */
918     if(job_tree.pdata==NULL) {
919       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
920     }
921     else {
922       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
923     }
924     if(iterr!=0) {
925       ithreaderr = 1;
926     }
927     if(PetscThreadGo) {
928       /* reset job, get ready for more */
929       if(PeeOn==PETSC_FALSE) {
930         /* check your subordinates, waiting for them to be ready
931          how do you know for a fact that a given subordinate has actually started? */
932 	for(i=1;i<=Mary;i++) {
933 	  SubWorker = Mary*ThreadId+i;
934           if(SubWorker<PetscMaxThreads) {
935             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
936             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
937               /* upon entry, automically releases the lock and blocks
938                upon return, has the lock */
939               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
940             }
941             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
942           }
943 	}
944         /* your subordinates are now ready */
945       }
946       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
947       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
948       if(ThreadId==0) {
949 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
950         /* root thread signals 'main' */
951         ierr = pthread_cond_signal(&main_cond);
952       }
953       else {
954         /* signal your boss before you go to sleep */
955         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
956       }
957     }
958   }
959   return NULL;
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "PetscThreadInitialize_Tree"
964 void* PetscThreadInitialize_Tree(PetscInt N) {
965   PetscInt i,ierr;
966   int status;
967 
968   if(PetscUseThreadPool) {
969 #if defined(PETSC_HAVE_MEMALIGN)
970     size_t Val1 = (size_t)CACHE_LINE_SIZE;
971     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
972     arrmutex = (char*)memalign(Val1,Val2);
973     arrcond1 = (char*)memalign(Val1,Val2);
974     arrcond2 = (char*)memalign(Val1,Val2);
975     arrstart = (char*)memalign(Val1,Val2);
976     arrready = (char*)memalign(Val1,Val2);
977 #else
978     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
979     arrmutex = (char*)malloc(Val2);
980     arrcond1 = (char*)malloc(Val2);
981     arrcond2 = (char*)malloc(Val2);
982     arrstart = (char*)malloc(Val2);
983     arrready = (char*)malloc(Val2);
984 #endif
985     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
986     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
987     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
988     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
989     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
990     /* initialize job structure */
991     for(i=0; i<PetscMaxThreads; i++) {
992       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
993       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
994       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
995       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
996       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
997     }
998     for(i=0; i<PetscMaxThreads; i++) {
999       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
1000       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
1001       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
1002       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
1003       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
1004     }
1005     job_tree.pfunc = NULL;
1006     job_tree.pdata = (void**)malloc(N*sizeof(void*));
1007     job_tree.startJob = PETSC_FALSE;
1008     job_tree.eJobStat = JobInitiated;
1009     pVal = (int*)malloc(N*sizeof(int));
1010     /* allocate memory in the heap for the thread structure */
1011     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1012     /* create threads */
1013     for(i=0; i<N; i++) {
1014       pVal[i] = i;
1015       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1016       /* should check status */
1017     }
1018   }
1019   return NULL;
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "PetscThreadFinalize_Tree"
1024 PetscErrorCode PetscThreadFinalize_Tree() {
1025   int i,ierr;
1026   void* jstatus;
1027 
1028   PetscFunctionBegin;
1029 
1030   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1031   /* join the threads */
1032   for(i=0; i<PetscMaxThreads; i++) {
1033     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1034     /* do error checking*/
1035   }
1036   free(PetscThreadPoint);
1037   free(arrmutex);
1038   free(arrcond1);
1039   free(arrcond2);
1040   free(arrstart);
1041   free(arrready);
1042   free(job_tree.pdata);
1043   free(pVal);
1044 
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MainWait_Tree"
1050 void MainWait_Tree() {
1051   int ierr;
1052   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1053   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1054     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1055   }
1056   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1057 }
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MainJob_Tree"
1061 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1062   int i,ierr;
1063   PetscErrorCode ijoberr = 0;
1064 
1065   MainWait();
1066   job_tree.pfunc = pFunc;
1067   job_tree.pdata = data;
1068   job_tree.startJob = PETSC_TRUE;
1069   for(i=0; i<PetscMaxThreads; i++) {
1070     *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1071   }
1072   job_tree.eJobStat = JobInitiated;
1073   ierr = pthread_cond_signal(job_tree.cond2array[0]);
1074   if(pFunc!=FuncFinish) {
1075     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1076   }
1077 
1078   if(ithreaderr) {
1079     ijoberr = ithreaderr;
1080   }
1081   return ijoberr;
1082 }
1083 /****  ****/
1084 
1085 /**** 'Main' Thread Pool Functions ****/
1086 void* PetscThreadFunc_Main(void* arg) {
1087   PetscErrorCode iterr;
1088   int ierr;
1089   int* pId = (int*)arg;
1090   int ThreadId = *pId;
1091   //printf("Thread %d In Main Thread Function\n",ThreadId);
1092 #if defined(PETSC_HAVE_CPU_SET_T)
1093     iterr = PetscPthreadSetAffinity(ThreadCoreAffinity[ThreadId]);CHKERRQ(iterr);
1094 #endif
1095 
1096   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1097   /* update your ready status */
1098   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1099   /* tell the BOSS that you're ready to work before you go to sleep */
1100   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1101 
1102   /* the while loop needs to have an exit
1103      the 'main' thread can terminate all the threads by performing a broadcast
1104      and calling FuncFinish */
1105   while(PetscThreadGo) {
1106     /* need to check the condition to ensure we don't have to wait
1107        waiting when you don't have to causes problems
1108      also need to check the condition to ensure proper handling of spurious wakeups */
1109     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1110       /* upon entry, atomically releases the lock and blocks
1111        upon return, has the lock */
1112         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1113 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1114     }
1115     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1116     if(job_main.pdata==NULL) {
1117       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1118     }
1119     else {
1120       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1121     }
1122     if(iterr!=0) {
1123       ithreaderr = 1;
1124     }
1125     if(PetscThreadGo) {
1126       /* reset job, get ready for more */
1127       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1128       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1129       /* tell the BOSS that you're ready to work before you go to sleep */
1130       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1131     }
1132   }
1133   return NULL;
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "PetscThreadInitialize_Main"
1138 void* PetscThreadInitialize_Main(PetscInt N) {
1139   PetscInt i,ierr;
1140   int status;
1141 
1142   if(PetscUseThreadPool) {
1143 #if defined(PETSC_HAVE_MEMALIGN)
1144     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1145     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1146     arrmutex = (char*)memalign(Val1,Val2);
1147     arrcond1 = (char*)memalign(Val1,Val2);
1148     arrcond2 = (char*)memalign(Val1,Val2);
1149     arrstart = (char*)memalign(Val1,Val2);
1150     arrready = (char*)memalign(Val1,Val2);
1151 #else
1152     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1153     arrmutex = (char*)malloc(Val2);
1154     arrcond1 = (char*)malloc(Val2);
1155     arrcond2 = (char*)malloc(Val2);
1156     arrstart = (char*)malloc(Val2);
1157     arrready = (char*)malloc(Val2);
1158 #endif
1159 
1160     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1161     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1162     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1163     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1164     /* initialize job structure */
1165     for(i=0; i<PetscMaxThreads; i++) {
1166       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1167       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1168       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1169       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1170     }
1171     for(i=0; i<PetscMaxThreads; i++) {
1172       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1173       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1174       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1175       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1176     }
1177     job_main.pfunc = NULL;
1178     job_main.pdata = (void**)malloc(N*sizeof(void*));
1179     pVal = (int*)malloc(N*sizeof(int));
1180     /* allocate memory in the heap for the thread structure */
1181     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1182     /* create threads */
1183     for(i=0; i<N; i++) {
1184       pVal[i] = i;
1185       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1186       /* error check */
1187     }
1188   }
1189   else {
1190   }
1191   return NULL;
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "PetscThreadFinalize_Main"
1196 PetscErrorCode PetscThreadFinalize_Main() {
1197   int i,ierr;
1198   void* jstatus;
1199 
1200   PetscFunctionBegin;
1201 
1202   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1203   /* join the threads */
1204   for(i=0; i<PetscMaxThreads; i++) {
1205     ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1206   }
1207   free(PetscThreadPoint);
1208   free(arrmutex);
1209   free(arrcond1);
1210   free(arrcond2);
1211   free(arrstart);
1212   free(arrready);
1213   free(job_main.pdata);
1214   free(pVal);
1215 
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MainWait_Main"
1221 void MainWait_Main() {
1222   int i,ierr;
1223   for(i=0; i<PetscMaxThreads; i++) {
1224     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1225     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1226       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1227     }
1228     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1229   }
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MainJob_Main"
1234 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1235   int i,ierr;
1236   PetscErrorCode ijoberr = 0;
1237 
1238   MainWait(); /* you know everyone is waiting to be signalled! */
1239   job_main.pfunc = pFunc;
1240   job_main.pdata = data;
1241   for(i=0; i<PetscMaxThreads; i++) {
1242     *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1243   }
1244   /* tell the threads to go to work */
1245   for(i=0; i<PetscMaxThreads; i++) {
1246     ierr = pthread_cond_signal(job_main.cond2array[i]);
1247   }
1248   if(pFunc!=FuncFinish) {
1249     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1250   }
1251 
1252   if(ithreaderr) {
1253     ijoberr = ithreaderr;
1254   }
1255   return ijoberr;
1256 }
1257 /****  ****/
1258 
1259 /**** Chain Thread Functions ****/
1260 void* PetscThreadFunc_Chain(void* arg) {
1261   PetscErrorCode iterr;
1262   int ierr;
1263   int* pId = (int*)arg;
1264   int ThreadId = *pId;
1265   int SubWorker = ThreadId + 1;
1266   PetscBool PeeOn;
1267   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1268 #if defined(PETSC_HAVE_CPU_SET_T)
1269   iterr = PetscPthreadSetAffinity(ThreadCoreAffinity[ThreadId]);CHKERRQ(iterr);
1270 #endif
1271   if(ThreadId==(PetscMaxThreads-1)) {
1272     PeeOn = PETSC_TRUE;
1273   }
1274   else {
1275     PeeOn = PETSC_FALSE;
1276   }
1277   if(PeeOn==PETSC_FALSE) {
1278     /* check your subordinate, wait for him to be ready */
1279     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1280     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1281       /* upon entry, automically releases the lock and blocks
1282        upon return, has the lock */
1283       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1284     }
1285     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1286     /* your subordinate is now ready*/
1287   }
1288   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1289   /* update your ready status */
1290   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1291   if(ThreadId==0) {
1292     job_chain.eJobStat = JobCompleted;
1293     /* signal main */
1294     ierr = pthread_cond_signal(&main_cond);
1295   }
1296   else {
1297     /* tell your boss that you're ready to work */
1298     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1299   }
1300   /*  the while loop needs to have an exit
1301      the 'main' thread can terminate all the threads by performing a broadcast
1302    and calling FuncFinish */
1303   while(PetscThreadGo) {
1304     /* need to check the condition to ensure we don't have to wait
1305        waiting when you don't have to causes problems
1306      also need to check the condition to ensure proper handling of spurious wakeups */
1307     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1308       /*upon entry, automically releases the lock and blocks
1309        upon return, has the lock */
1310         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1311 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1312 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1313     }
1314     if(ThreadId==0) {
1315       job_chain.startJob = PETSC_FALSE;
1316       job_chain.eJobStat = ThreadsWorking;
1317     }
1318     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1319     if(PeeOn==PETSC_FALSE) {
1320       /* tell your subworker it's time to get to work */
1321       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1322     }
1323     /* do your job */
1324     if(job_chain.pdata==NULL) {
1325       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1326     }
1327     else {
1328       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1329     }
1330     if(iterr!=0) {
1331       ithreaderr = 1;
1332     }
1333     if(PetscThreadGo) {
1334       /* reset job, get ready for more */
1335       if(PeeOn==PETSC_FALSE) {
1336         /* check your subordinate, wait for him to be ready
1337          how do you know for a fact that your subordinate has actually started? */
1338         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1339         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1340           /* upon entry, automically releases the lock and blocks
1341            upon return, has the lock */
1342           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1343         }
1344         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1345         /* your subordinate is now ready */
1346       }
1347       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1348       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1349       if(ThreadId==0) {
1350 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1351         /* root thread (foreman) signals 'main' */
1352         ierr = pthread_cond_signal(&main_cond);
1353       }
1354       else {
1355         /* signal your boss before you go to sleep */
1356         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1357       }
1358     }
1359   }
1360   return NULL;
1361 }
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "PetscThreadInitialize_Chain"
1365 void* PetscThreadInitialize_Chain(PetscInt N) {
1366   PetscInt i,ierr;
1367   int status;
1368 
1369   if(PetscUseThreadPool) {
1370 #if defined(PETSC_HAVE_MEMALIGN)
1371     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1372     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1373     arrmutex = (char*)memalign(Val1,Val2);
1374     arrcond1 = (char*)memalign(Val1,Val2);
1375     arrcond2 = (char*)memalign(Val1,Val2);
1376     arrstart = (char*)memalign(Val1,Val2);
1377     arrready = (char*)memalign(Val1,Val2);
1378 #else
1379     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1380     arrmutex = (char*)malloc(Val2);
1381     arrcond1 = (char*)malloc(Val2);
1382     arrcond2 = (char*)malloc(Val2);
1383     arrstart = (char*)malloc(Val2);
1384     arrready = (char*)malloc(Val2);
1385 #endif
1386 
1387     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1388     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1389     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1390     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1391     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1392     /* initialize job structure */
1393     for(i=0; i<PetscMaxThreads; i++) {
1394       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1395       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1396       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1397       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1398       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1399     }
1400     for(i=0; i<PetscMaxThreads; i++) {
1401       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1402       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1403       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1404       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1405       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1406     }
1407     job_chain.pfunc = NULL;
1408     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1409     job_chain.startJob = PETSC_FALSE;
1410     job_chain.eJobStat = JobInitiated;
1411     pVal = (int*)malloc(N*sizeof(int));
1412     /* allocate memory in the heap for the thread structure */
1413     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1414     /* create threads */
1415     for(i=0; i<N; i++) {
1416       pVal[i] = i;
1417       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1418       /* should check error */
1419     }
1420   }
1421   else {
1422   }
1423   return NULL;
1424 }
1425 
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "PetscThreadFinalize_Chain"
1429 PetscErrorCode PetscThreadFinalize_Chain() {
1430   int i,ierr;
1431   void* jstatus;
1432 
1433   PetscFunctionBegin;
1434 
1435   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1436   /* join the threads */
1437   for(i=0; i<PetscMaxThreads; i++) {
1438     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1439     /* should check error */
1440   }
1441   free(PetscThreadPoint);
1442   free(arrmutex);
1443   free(arrcond1);
1444   free(arrcond2);
1445   free(arrstart);
1446   free(arrready);
1447   free(job_chain.pdata);
1448   free(pVal);
1449 
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNCT__
1454 #define __FUNCT__ "MainWait_Chain"
1455 void MainWait_Chain() {
1456   int ierr;
1457   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1458   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1459     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1460   }
1461   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MainJob_Chain"
1466 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1467   int i,ierr;
1468   PetscErrorCode ijoberr = 0;
1469 
1470   MainWait();
1471   job_chain.pfunc = pFunc;
1472   job_chain.pdata = data;
1473   job_chain.startJob = PETSC_TRUE;
1474   for(i=0; i<PetscMaxThreads; i++) {
1475     *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1476   }
1477   job_chain.eJobStat = JobInitiated;
1478   ierr = pthread_cond_signal(job_chain.cond2array[0]);
1479   if(pFunc!=FuncFinish) {
1480     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1481   }
1482 
1483   if(ithreaderr) {
1484     ijoberr = ithreaderr;
1485   }
1486   return ijoberr;
1487 }
1488 /****  ****/
1489 
1490 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1491 /**** True Thread Functions ****/
1492 void* PetscThreadFunc_True(void* arg) {
1493   int ierr,iVal;
1494   int* pId = (int*)arg;
1495   int ThreadId = *pId;
1496   PetscErrorCode iterr;
1497   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1498 #if defined(PETSC_HAVE_CPU_SET_T)
1499   iterr = PetscPthreadSetAffinity(ThreadCoreAffinity[ThreadId]);CHKERRQ(iterr);
1500 #endif
1501   ierr = pthread_mutex_lock(&job_true.mutex);
1502   job_true.iNumReadyThreads++;
1503   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1504     ierr = pthread_cond_signal(&main_cond);
1505   }
1506   /*the while loop needs to have an exit
1507     the 'main' thread can terminate all the threads by performing a broadcast
1508    and calling FuncFinish */
1509   while(PetscThreadGo) {
1510     /*need to check the condition to ensure we don't have to wait
1511       waiting when you don't have to causes problems
1512      also need to wait if another thread sneaks in and messes with the predicate */
1513     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1514       /* upon entry, automically releases the lock and blocks
1515        upon return, has the lock */
1516       //printf("Thread %d Going to Sleep!\n",ThreadId);
1517       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1518     }
1519     job_true.startJob = PETSC_FALSE;
1520     job_true.iNumJobThreads--;
1521     job_true.iNumReadyThreads--;
1522     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1523     pthread_mutex_unlock(&job_true.mutex);
1524     if(job_true.pdata==NULL) {
1525       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1526     }
1527     else {
1528       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1529     }
1530     if(iterr!=0) {
1531       ithreaderr = 1;
1532     }
1533     //printf("Thread %d Finished Job\n",ThreadId);
1534     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1535       what happens if a thread finishes before they all start? BAD!
1536      what happens if a thread finishes before any else start? BAD! */
1537     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1538     /* reset job */
1539     if(PetscThreadGo) {
1540       pthread_mutex_lock(&job_true.mutex);
1541       job_true.iNumReadyThreads++;
1542       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1543 	/* signal the 'main' thread that the job is done! (only done once) */
1544 	ierr = pthread_cond_signal(&main_cond);
1545       }
1546     }
1547   }
1548   return NULL;
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "PetscThreadInitialize_True"
1553 void* PetscThreadInitialize_True(PetscInt N) {
1554   PetscInt i;
1555   int status;
1556 
1557   pVal = (int*)malloc(N*sizeof(int));
1558   /* allocate memory in the heap for the thread structure */
1559   PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1560   BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1561   job_true.pdata = (void**)malloc(N*sizeof(void*));
1562   for(i=0; i<N; i++) {
1563     pVal[i] = i;
1564     status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1565     /* error check to ensure proper thread creation */
1566     status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1567     /* should check error */
1568   }
1569   //printf("Finished True Thread Pool Initialization\n");
1570   return NULL;
1571 }
1572 
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "PetscThreadFinalize_True"
1576 PetscErrorCode PetscThreadFinalize_True() {
1577   int i,ierr;
1578   void* jstatus;
1579 
1580   PetscFunctionBegin;
1581 
1582   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1583   /* join the threads */
1584   for(i=0; i<PetscMaxThreads; i++) {
1585     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1586   }
1587   free(BarrPoint);
1588   free(PetscThreadPoint);
1589 
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MainWait_True"
1595 void MainWait_True() {
1596   int ierr;
1597   ierr = pthread_mutex_lock(&job_true.mutex);
1598   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1599     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1600   }
1601   ierr = pthread_mutex_unlock(&job_true.mutex);
1602 }
1603 
1604 #undef __FUNCT__
1605 #define __FUNCT__ "MainJob_True"
1606 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1607   int ierr;
1608   PetscErrorCode ijoberr = 0;
1609 
1610   MainWait();
1611   job_true.pfunc = pFunc;
1612   job_true.pdata = data;
1613   job_true.pbarr = &BarrPoint[n];
1614   job_true.iNumJobThreads = n;
1615   job_true.startJob = PETSC_TRUE;
1616   ierr = pthread_cond_broadcast(&job_true.cond);
1617   if(pFunc!=FuncFinish) {
1618     MainWait(); /* why wait after? guarantees that job gets done */
1619   }
1620 
1621   if(ithreaderr) {
1622     ijoberr = ithreaderr;
1623   }
1624   return ijoberr;
1625 }
1626 #endif
1627 
1628 /**** NO THREAD POOL FUNCTION ****/
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MainJob_Spawn"
1631 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1632   PetscErrorCode ijoberr = 0;
1633 
1634   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1635   PetscThreadPoint = apThread; /* point to same place */
1636   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1637   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1638   free(apThread);
1639 
1640   return ijoberr;
1641 }
1642 /****  ****/
1643 #endif
1644 
1645 void* FuncFinish(void* arg) {
1646   PetscThreadGo = PETSC_FALSE;
1647   return(0);
1648 }
1649