xref: /petsc/src/sys/objects/init.c (revision df17f0d5a75f08e11c817a1972ff7c67227bc74f)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #define _GNU_SOURCE
10 #include <sched.h>
11 #include <petscsys.h>        /*I  "petscsys.h"   I*/
12 #if defined(PETSC_USE_PTHREAD)
13 #include <pthread.h>
14 #endif
15 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
16 #include <sys/sysinfo.h>
17 #endif
18 #include <unistd.h>
19 #if defined(PETSC_HAVE_STDLIB_H)
20 #include <stdlib.h>
21 #endif
22 #if defined(PETSC_HAVE_MALLOC_H)
23 #include <malloc.h>
24 #endif
25 #if defined(PETSC_HAVE_VALGRIND)
26 #include <valgrind/valgrind.h>
27 #endif
28 
29 /* ------------------------Nasty global variables -------------------------------*/
30 /*
31      Indicates if PETSc started up MPI, or it was
32    already started before PETSc was initialized.
33 */
34 PetscBool    PetscBeganMPI         = PETSC_FALSE;
35 PetscBool    PetscInitializeCalled = PETSC_FALSE;
36 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
37 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
38 PetscBool    PetscThreadGo         = PETSC_TRUE;
39 PetscMPIInt  PetscGlobalRank = -1;
40 PetscMPIInt  PetscGlobalSize = -1;
41 
42 #if defined(PETSC_USE_PTHREAD_CLASSES)
43 PetscMPIInt  PetscMaxThreads = 2;
44 pthread_t*   PetscThreadPoint;
45 #define PETSC_HAVE_PTHREAD_BARRIER
46 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
47 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
48 #endif
49 PetscErrorCode ithreaderr = 0;
50 int*         pVal;
51 
52 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
53 int* ThreadCoreAffinity;
54 
55 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
56 
57 typedef struct {
58   pthread_mutex_t** mutexarray;
59   pthread_cond_t**  cond1array;
60   pthread_cond_t** cond2array;
61   void* (*pfunc)(void*);
62   void** pdata;
63   PetscBool startJob;
64   estat eJobStat;
65   PetscBool** arrThreadStarted;
66   PetscBool** arrThreadReady;
67 } sjob_tree;
68 sjob_tree job_tree;
69 typedef struct {
70   pthread_mutex_t** mutexarray;
71   pthread_cond_t**  cond1array;
72   pthread_cond_t** cond2array;
73   void* (*pfunc)(void*);
74   void** pdata;
75   PetscBool** arrThreadReady;
76 } sjob_main;
77 sjob_main job_main;
78 typedef struct {
79   pthread_mutex_t** mutexarray;
80   pthread_cond_t**  cond1array;
81   pthread_cond_t** cond2array;
82   void* (*pfunc)(void*);
83   void** pdata;
84   PetscBool startJob;
85   estat eJobStat;
86   PetscBool** arrThreadStarted;
87   PetscBool** arrThreadReady;
88 } sjob_chain;
89 sjob_chain job_chain;
90 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
91 typedef struct {
92   pthread_mutex_t mutex;
93   pthread_cond_t cond;
94   void* (*pfunc)(void*);
95   void** pdata;
96   pthread_barrier_t* pbarr;
97   int iNumJobThreads;
98   int iNumReadyThreads;
99   PetscBool startJob;
100 } sjob_true;
101 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
102 #endif
103 
104 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
105 char* arrmutex; /* used by 'chain','main','tree' thread pools */
106 char* arrcond1; /* used by 'chain','main','tree' thread pools */
107 char* arrcond2; /* used by 'chain','main','tree' thread pools */
108 char* arrstart; /* used by 'chain','main','tree' thread pools */
109 char* arrready; /* used by 'chain','main','tree' thread pools */
110 
111 /* Function Pointers */
112 void*          (*PetscThreadFunc)(void*) = NULL;
113 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
114 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
115 void           (*MainWait)(void) = NULL;
116 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
117 /**** Tree Functions ****/
118 void*          PetscThreadFunc_Tree(void*);
119 void*          PetscThreadInitialize_Tree(PetscInt);
120 PetscErrorCode PetscThreadFinalize_Tree(void);
121 void           MainWait_Tree(void);
122 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
123 /**** Main Functions ****/
124 void*          PetscThreadFunc_Main(void*);
125 void*          PetscThreadInitialize_Main(PetscInt);
126 PetscErrorCode PetscThreadFinalize_Main(void);
127 void           MainWait_Main(void);
128 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
129 /**** Chain Functions ****/
130 void*          PetscThreadFunc_Chain(void*);
131 void*          PetscThreadInitialize_Chain(PetscInt);
132 PetscErrorCode PetscThreadFinalize_Chain(void);
133 void           MainWait_Chain(void);
134 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
135 /**** True Functions ****/
136 void*          PetscThreadFunc_True(void*);
137 void*          PetscThreadInitialize_True(PetscInt);
138 PetscErrorCode PetscThreadFinalize_True(void);
139 void           MainWait_True(void);
140 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
141 /****  ****/
142 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
143 
144 void* FuncFinish(void*);
145 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
146 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
147 #endif
148 
149 #if defined(PETSC_USE_COMPLEX)
150 #if defined(PETSC_COMPLEX_INSTANTIATE)
151 template <> class std::complex<double>; /* instantiate complex template class */
152 #endif
153 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
154 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
155 MPI_Datatype   MPI_C_COMPLEX;
156 #endif
157 PetscScalar    PETSC_i;
158 #else
159 PetscScalar    PETSC_i = 0.0;
160 #endif
161 #if defined(PETSC_USE_REAL___FLOAT128)
162 MPI_Datatype   MPIU___FLOAT128 = 0;
163 #endif
164 MPI_Datatype   MPIU_2SCALAR = 0;
165 MPI_Datatype   MPIU_2INT = 0;
166 
167 /*
168      These are needed by petscbt.h
169 */
170 #include <petscbt.h>
171 char      _BT_mask = ' ';
172 char      _BT_c = ' ';
173 PetscInt  _BT_idx  = 0;
174 
175 /*
176        Function that is called to display all error messages
177 */
178 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
179 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
180 #if defined(PETSC_HAVE_MATLAB_ENGINE)
181 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
182 #else
183 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
184 #endif
185 /*
186   This is needed to turn on/off cusp synchronization */
187 PetscBool   synchronizeCUSP = PETSC_FALSE;
188 
189 /* ------------------------------------------------------------------------------*/
190 /*
191    Optional file where all PETSc output from various prints is saved
192 */
193 FILE *petsc_history = PETSC_NULL;
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "PetscOpenHistoryFile"
197 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
198 {
199   PetscErrorCode ierr;
200   PetscMPIInt    rank,size;
201   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
202   char           version[256];
203 
204   PetscFunctionBegin;
205   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
206   if (!rank) {
207     char        arch[10];
208     int         err;
209     PetscViewer viewer;
210 
211     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
212     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
213     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
214     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
215     if (filename) {
216       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
217     } else {
218       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
219       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
220       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
221     }
222 
223     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
224     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
225     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
226     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
227     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
228     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
229     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
230     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
231     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
232     err = fflush(*fd);
233     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "PetscCloseHistoryFile"
240 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
241 {
242   PetscErrorCode ierr;
243   PetscMPIInt    rank;
244   char           date[64];
245   int            err;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
249   if (!rank) {
250     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
251     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
252     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
253     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
254     err = fflush(*fd);
255     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
256     err = fclose(*fd);
257     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 /* ------------------------------------------------------------------------------*/
263 
264 /*
265    This is ugly and probably belongs somewhere else, but I want to
266   be able to put a true MPI abort error handler with command line args.
267 
268     This is so MPI errors in the debugger will leave all the stack
269   frames. The default MP_Abort() cleans up and exits thus providing no useful information
270   in the debugger hence we call abort() instead of MPI_Abort().
271 */
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "Petsc_MPI_AbortOnError"
275 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
276 {
277   PetscFunctionBegin;
278   (*PetscErrorPrintf)("MPI error %d\n",*flag);
279   abort();
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
284 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
285 {
286   PetscErrorCode ierr;
287 
288   PetscFunctionBegin;
289   (*PetscErrorPrintf)("MPI error %d\n",*flag);
290   ierr = PetscAttachDebugger();
291   if (ierr) { /* hopeless so get out */
292     MPI_Abort(*comm,*flag);
293   }
294 }
295 
296 #undef __FUNCT__
297 #define __FUNCT__ "PetscEnd"
298 /*@C
299    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
300      wishes a clean exit somewhere deep in the program.
301 
302    Collective on PETSC_COMM_WORLD
303 
304    Options Database Keys are the same as for PetscFinalize()
305 
306    Level: advanced
307 
308    Note:
309    See PetscInitialize() for more general runtime options.
310 
311 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
312 @*/
313 PetscErrorCode  PetscEnd(void)
314 {
315   PetscFunctionBegin;
316   PetscFinalize();
317   exit(0);
318   return 0;
319 }
320 
321 PetscBool    PetscOptionsPublish = PETSC_FALSE;
322 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
323 extern PetscBool  petscsetmallocvisited;
324 static char       emacsmachinename[256];
325 
326 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
327 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "PetscSetHelpVersionFunctions"
331 /*@C
332    PetscSetHelpVersionFunctions - Sets functions that print help and version information
333    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
334    This routine enables a "higher-level" package that uses PETSc to print its messages first.
335 
336    Input Parameter:
337 +  help - the help function (may be PETSC_NULL)
338 -  version - the version function (may be PETSC_NULL)
339 
340    Level: developer
341 
342    Concepts: package help message
343 
344 @*/
345 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
346 {
347   PetscFunctionBegin;
348   PetscExternalHelpFunction    = help;
349   PetscExternalVersionFunction = version;
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
355 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
356 {
357   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
358   MPI_Comm       comm = PETSC_COMM_WORLD;
359   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
360   PetscErrorCode ierr;
361   PetscReal      si;
362   int            i;
363   PetscMPIInt    rank;
364   char           version[256];
365 
366   PetscFunctionBegin;
367   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
368 
369   /*
370       Setup the memory management; support for tracing malloc() usage
371   */
372   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
373 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
374   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
375   if ((!flg2 || flg1) && !petscsetmallocvisited) {
376 #if defined(PETSC_HAVE_VALGRIND)
377     if (flg2 || !(RUNNING_ON_VALGRIND)) {
378       /* turn off default -malloc if valgrind is being used */
379 #endif
380       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
381 #if defined(PETSC_HAVE_VALGRIND)
382     }
383 #endif
384   }
385 #else
386   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
387   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
388   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
389 #endif
390   if (flg3) {
391     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
392   }
393   flg1 = PETSC_FALSE;
394   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
395   if (flg1) {
396     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
397     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
398   }
399 
400   flg1 = PETSC_FALSE;
401   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
402   if (!flg1) {
403     flg1 = PETSC_FALSE;
404     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
405   }
406   if (flg1) {
407     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
408   }
409 
410   /*
411       Set the display variable for graphics
412   */
413   ierr = PetscSetDisplay();CHKERRQ(ierr);
414 
415 
416   /*
417       Print the PETSc version information
418   */
419   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
420   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
421   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
422   if (flg1 || flg2 || flg3){
423 
424     /*
425        Print "higher-level" package version message
426     */
427     if (PetscExternalVersionFunction) {
428       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
429     }
430 
431     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
432     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
433 ------------------------------\n");CHKERRQ(ierr);
434     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
435     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
436     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
437     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
439     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
441 ------------------------------\n");CHKERRQ(ierr);
442   }
443 
444   /*
445        Print "higher-level" package help message
446   */
447   if (flg3){
448     if (PetscExternalHelpFunction) {
449       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
450     }
451   }
452 
453   /*
454       Setup the error handling
455   */
456   flg1 = PETSC_FALSE;
457   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
458   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
459   flg1 = PETSC_FALSE;
460   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
461   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
462   flg1 = PETSC_FALSE;
463   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
464   if (flg1) {
465     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
466   }
467   flg1 = PETSC_FALSE;
468   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
469   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
470   flg1 = PETSC_FALSE;
471   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
472   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
473 
474   /*
475       Setup debugger information
476   */
477   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
478   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
479   if (flg1) {
480     MPI_Errhandler err_handler;
481 
482     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
483     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
484     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
485     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
486   }
487   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
488   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
489   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
490   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
491   if (flg1 || flg2) {
492     PetscMPIInt    size;
493     PetscInt       lsize,*nodes;
494     MPI_Errhandler err_handler;
495     /*
496        we have to make sure that all processors have opened
497        connections to all other processors, otherwise once the
498        debugger has stated it is likely to receive a SIGUSR1
499        and kill the program.
500     */
501     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
502     if (size > 2) {
503       PetscMPIInt dummy = 0;
504       MPI_Status  status;
505       for (i=0; i<size; i++) {
506         if (rank != i) {
507           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
508         }
509       }
510       for (i=0; i<size; i++) {
511         if (rank != i) {
512           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
513         }
514       }
515     }
516     /* check if this processor node should be in debugger */
517     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
518     lsize = size;
519     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
520     if (flag) {
521       for (i=0; i<lsize; i++) {
522         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
523       }
524     }
525     if (!flag) {
526       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
527       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
528       if (flg1) {
529         ierr = PetscAttachDebugger();CHKERRQ(ierr);
530       } else {
531         ierr = PetscStopForDebugger();CHKERRQ(ierr);
532       }
533       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
534       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
535     }
536     ierr = PetscFree(nodes);CHKERRQ(ierr);
537   }
538 
539   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
540   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
541 
542 #if defined(PETSC_USE_SOCKET_VIEWER)
543   /*
544     Activates new sockets for zope if needed
545   */
546   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
547   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
548   if (flgz){
549     int  sockfd;
550     char hostname[256];
551     char username[256];
552     int  remoteport = 9999;
553 
554     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
555     if (!hostname[0]){
556       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
557     }
558     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
559     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
560     PETSC_ZOPEFD = fdopen(sockfd, "w");
561     if (flgzout){
562       PETSC_STDOUT = PETSC_ZOPEFD;
563       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
564       fprintf(PETSC_STDOUT, "<<<start>>>");
565     } else {
566       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
567       fprintf(PETSC_ZOPEFD, "<<<start>>>");
568     }
569   }
570 #endif
571 #if defined(PETSC_USE_SERVER)
572   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
573   if (flgz){
574     PetscInt port = PETSC_DECIDE;
575     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
576     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
577   }
578 #endif
579 
580   /*
581         Setup profiling and logging
582   */
583 #if defined (PETSC_USE_INFO)
584   {
585     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
586     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
587     if (flg1 && logname[0]) {
588       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
589     } else if (flg1) {
590       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
591     }
592   }
593 #endif
594 #if defined(PETSC_USE_LOG)
595   mname[0] = 0;
596   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
597   if (flg1) {
598     if (mname[0]) {
599       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
600     } else {
601       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
602     }
603   }
604 #if defined(PETSC_HAVE_MPE)
605   flg1 = PETSC_FALSE;
606   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
607   if (flg1) PetscLogMPEBegin();
608 #endif
609   flg1 = PETSC_FALSE;
610   flg2 = PETSC_FALSE;
611   flg3 = PETSC_FALSE;
612   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
613   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
614   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
615   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
616   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
617   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
618 
619   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
620   if (flg1) {
621     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
622     FILE *file;
623     if (mname[0]) {
624       sprintf(name,"%s.%d",mname,rank);
625       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
626       file = fopen(fname,"w");
627       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
628     } else {
629       file = PETSC_STDOUT;
630     }
631     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
632   }
633 #endif
634 
635   /*
636       Setup building of stack frames for all function calls
637   */
638 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
639   ierr = PetscStackCreate();CHKERRQ(ierr);
640 #endif
641 
642   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
643 
644 #if defined(PETSC_USE_PTHREAD_CLASSES)
645   /*
646       Determine whether user specified maximum number of threads
647    */
648   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
649 
650   /*
651       Determine whether to use thread pool
652    */
653   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
654   if (flg1) {
655     PetscUseThreadPool = PETSC_TRUE;
656     PetscInt N_CORES = get_nprocs();
657     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
658     char tstr[9];
659     char tbuf[2];
660     strcpy(tstr,"-thread");
661     for(i=0;i<PetscMaxThreads;i++) {
662       ThreadCoreAffinity[i] = i;
663       sprintf(tbuf,"%d",i);
664       strcat(tstr,tbuf);
665       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
666       if(flg1) {
667         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
668         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
669       }
670       tstr[7] = '\0';
671     }
672     /* get the thread pool type */
673     PetscInt ipool = 0;
674     const char *choices[4] = {"true","tree","main","chain"};
675 
676     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
677     switch(ipool) {
678     case 1:
679       PetscThreadFunc       = &PetscThreadFunc_Tree;
680       PetscThreadInitialize = &PetscThreadInitialize_Tree;
681       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
682       MainWait              = &MainWait_Tree;
683       MainJob               = &MainJob_Tree;
684       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
685       break;
686     case 2:
687       PetscThreadFunc       = &PetscThreadFunc_Main;
688       PetscThreadInitialize = &PetscThreadInitialize_Main;
689       PetscThreadFinalize   = &PetscThreadFinalize_Main;
690       MainWait              = &MainWait_Main;
691       MainJob               = &MainJob_Main;
692       PetscInfo(PETSC_NULL,"Using main thread pool\n");
693       break;
694 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
695     case 3:
696 #else
697     default:
698 #endif
699       PetscThreadFunc       = &PetscThreadFunc_Chain;
700       PetscThreadInitialize = &PetscThreadInitialize_Chain;
701       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
702       MainWait              = &MainWait_Chain;
703       MainJob               = &MainJob_Chain;
704       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
705       break;
706 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
707     default:
708       PetscThreadFunc       = &PetscThreadFunc_True;
709       PetscThreadInitialize = &PetscThreadInitialize_True;
710       PetscThreadFinalize   = &PetscThreadFinalize_True;
711       MainWait              = &MainWait_True;
712       MainJob               = &MainJob_True;
713       PetscInfo(PETSC_NULL,"Using true thread pool\n");
714       break;
715 #endif
716     }
717     PetscThreadInitialize(PetscMaxThreads);
718   } else {
719     //need to define these in the case on 'no threads' or 'thread create/destroy'
720     //could take any of the above versions
721     MainJob               = &MainJob_Spawn;
722   }
723 #endif
724   /*
725        Print basic help message
726   */
727   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
728   if (flg1) {
729     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
730     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
731     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
732     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
733     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
734     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
735     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
736     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
737     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
738     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
739     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
740     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
741     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
742     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
743     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
763 #if defined(PETSC_USE_LOG)
764     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
767 #if defined(PETSC_HAVE_MPE)
768     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
769 #endif
770     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
771 #endif
772     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
773     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
776   }
777 
778   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
779   if (flg1) {
780     ierr = PetscSleep(si);CHKERRQ(ierr);
781   }
782 
783   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
784   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
785   if (f) {
786     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
787   }
788 
789 #if defined(PETSC_HAVE_CUSP)
790   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
791   if (flg3) flg1 = PETSC_TRUE;
792   else flg1 = PETSC_FALSE;
793   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
794   if (flg1) synchronizeCUSP = PETSC_TRUE;
795 #endif
796 
797   PetscFunctionReturn(0);
798 }
799 
800 #if defined(PETSC_USE_PTHREAD_CLASSES)
801 
802 /**** 'Tree' Thread Pool Functions ****/
803 void* PetscThreadFunc_Tree(void* arg) {
804   PetscErrorCode iterr;
805   int icorr,ierr;
806   int* pId = (int*)arg;
807   int ThreadId = *pId,Mary = 2,i,SubWorker;
808   PetscBool PeeOn;
809   cpu_set_t mset;
810   //printf("Thread %d In Tree Thread Function\n",ThreadId);
811   icorr = ThreadCoreAffinity[ThreadId];
812   CPU_ZERO(&mset);
813   CPU_SET(icorr,&mset);
814   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
815 
816   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
817     PeeOn = PETSC_TRUE;
818   }
819   else {
820     PeeOn = PETSC_FALSE;
821   }
822   if(PeeOn==PETSC_FALSE) {
823     /* check your subordinates, wait for them to be ready */
824     for(i=1;i<=Mary;i++) {
825       SubWorker = Mary*ThreadId+i;
826       if(SubWorker<PetscMaxThreads) {
827         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
828         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
829           /* upon entry, automically releases the lock and blocks
830            upon return, has the lock */
831           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
832         }
833         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
834       }
835     }
836     /* your subordinates are now ready */
837   }
838   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
839   /* update your ready status */
840   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
841   if(ThreadId==0) {
842     job_tree.eJobStat = JobCompleted;
843     /* ignal main */
844     ierr = pthread_cond_signal(&main_cond);
845   }
846   else {
847     /* tell your boss that you're ready to work */
848     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
849   }
850   /* the while loop needs to have an exit
851   the 'main' thread can terminate all the threads by performing a broadcast
852    and calling FuncFinish */
853   while(PetscThreadGo) {
854     /*need to check the condition to ensure we don't have to wait
855       waiting when you don't have to causes problems
856      also need to check the condition to ensure proper handling of spurious wakeups */
857     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
858       /* upon entry, automically releases the lock and blocks
859        upon return, has the lock */
860         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
861 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
862 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
863     }
864     if(ThreadId==0) {
865       job_tree.startJob = PETSC_FALSE;
866       job_tree.eJobStat = ThreadsWorking;
867     }
868     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
869     if(PeeOn==PETSC_FALSE) {
870       /* tell your subordinates it's time to get to work */
871       for(i=1; i<=Mary; i++) {
872 	SubWorker = Mary*ThreadId+i;
873         if(SubWorker<PetscMaxThreads) {
874           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
875         }
876       }
877     }
878     /* do your job */
879     if(job_tree.pdata==NULL) {
880       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
881     }
882     else {
883       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
884     }
885     if(iterr!=0) {
886       ithreaderr = 1;
887     }
888     if(PetscThreadGo) {
889       /* reset job, get ready for more */
890       if(PeeOn==PETSC_FALSE) {
891         /* check your subordinates, waiting for them to be ready
892          how do you know for a fact that a given subordinate has actually started? */
893 	for(i=1;i<=Mary;i++) {
894 	  SubWorker = Mary*ThreadId+i;
895           if(SubWorker<PetscMaxThreads) {
896             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
897             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
898               /* upon entry, automically releases the lock and blocks
899                upon return, has the lock */
900               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
901             }
902             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
903           }
904 	}
905         /* your subordinates are now ready */
906       }
907       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
908       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
909       if(ThreadId==0) {
910 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
911         /* root thread signals 'main' */
912         ierr = pthread_cond_signal(&main_cond);
913       }
914       else {
915         /* signal your boss before you go to sleep */
916         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
917       }
918     }
919   }
920   return NULL;
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "PetscThreadInitialize_Tree"
925 void* PetscThreadInitialize_Tree(PetscInt N) {
926   PetscInt i,ierr;
927   int status;
928 
929   if(PetscUseThreadPool) {
930     size_t Val1 = (size_t)CACHE_LINE_SIZE;
931     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
932     arrmutex = (char*)memalign(Val1,Val2);
933     arrcond1 = (char*)memalign(Val1,Val2);
934     arrcond2 = (char*)memalign(Val1,Val2);
935     arrstart = (char*)memalign(Val1,Val2);
936     arrready = (char*)memalign(Val1,Val2);
937     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
938     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
939     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
940     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
941     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
942     /* initialize job structure */
943     for(i=0; i<PetscMaxThreads; i++) {
944       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
945       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
946       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
947       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
948       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
949     }
950     for(i=0; i<PetscMaxThreads; i++) {
951       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
952       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
953       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
954       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
955       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
956     }
957     job_tree.pfunc = NULL;
958     job_tree.pdata = (void**)malloc(N*sizeof(void*));
959     job_tree.startJob = PETSC_FALSE;
960     job_tree.eJobStat = JobInitiated;
961     pVal = (int*)malloc(N*sizeof(int));
962     /* allocate memory in the heap for the thread structure */
963     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
964     /* create threads */
965     for(i=0; i<N; i++) {
966       pVal[i] = i;
967       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
968       /* should check status */
969     }
970   }
971   return NULL;
972 }
973 
974 #undef __FUNCT__
975 #define __FUNCT__ "PetscThreadFinalize_Tree"
976 PetscErrorCode PetscThreadFinalize_Tree() {
977   int i,ierr;
978   void* jstatus;
979 
980   PetscFunctionBegin;
981 
982   if(PetscUseThreadPool) {
983     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
984     /* join the threads */
985     for(i=0; i<PetscMaxThreads; i++) {
986       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
987       /* do error checking*/
988     }
989     free(PetscThreadPoint);
990     free(arrmutex);
991     free(arrcond1);
992     free(arrcond2);
993     free(arrstart);
994     free(arrready);
995     free(job_tree.pdata);
996     free(pVal);
997   }
998   else {
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MainWait_Tree"
1005 void MainWait_Tree() {
1006   int ierr;
1007   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1008   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1009     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1010   }
1011   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MainJob_Tree"
1016 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1017   int i,ierr;
1018   PetscErrorCode ijoberr = 0;
1019   if(PetscUseThreadPool) {
1020     MainWait();
1021     job_tree.pfunc = pFunc;
1022     job_tree.pdata = data;
1023     job_tree.startJob = PETSC_TRUE;
1024     for(i=0; i<PetscMaxThreads; i++) {
1025       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1026     }
1027     job_tree.eJobStat = JobInitiated;
1028     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1029     if(pFunc!=FuncFinish) {
1030       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1031     }
1032   }
1033   else {
1034     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1035     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1036     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1037     free(apThread);
1038   }
1039   if(ithreaderr) {
1040     ijoberr = ithreaderr;
1041   }
1042   return ijoberr;
1043 }
1044 /****  ****/
1045 
1046 /**** 'Main' Thread Pool Functions ****/
1047 void* PetscThreadFunc_Main(void* arg) {
1048   PetscErrorCode iterr;
1049   int icorr,ierr;
1050   int* pId = (int*)arg;
1051   int ThreadId = *pId;
1052   cpu_set_t mset;
1053   //printf("Thread %d In Main Thread Function\n",ThreadId);
1054   icorr = ThreadCoreAffinity[ThreadId];
1055   CPU_ZERO(&mset);
1056   CPU_SET(icorr,&mset);
1057   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1058 
1059   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1060   /* update your ready status */
1061   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1062   /* tell the BOSS that you're ready to work before you go to sleep */
1063   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1064 
1065   /* the while loop needs to have an exit
1066      the 'main' thread can terminate all the threads by performing a broadcast
1067      and calling FuncFinish */
1068   while(PetscThreadGo) {
1069     /* need to check the condition to ensure we don't have to wait
1070        waiting when you don't have to causes problems
1071      also need to check the condition to ensure proper handling of spurious wakeups */
1072     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1073       /* upon entry, atomically releases the lock and blocks
1074        upon return, has the lock */
1075         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1076 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1077     }
1078     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1079     if(job_main.pdata==NULL) {
1080       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1081     }
1082     else {
1083       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1084     }
1085     if(iterr!=0) {
1086       ithreaderr = 1;
1087     }
1088     if(PetscThreadGo) {
1089       /* reset job, get ready for more */
1090       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1091       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1092       /* tell the BOSS that you're ready to work before you go to sleep */
1093       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1094     }
1095   }
1096   return NULL;
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "PetscThreadInitialize_Main"
1101 void* PetscThreadInitialize_Main(PetscInt N) {
1102   PetscInt i,ierr;
1103   int status;
1104 
1105   if(PetscUseThreadPool) {
1106     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1107     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1108     arrmutex = (char*)memalign(Val1,Val2);
1109     arrcond1 = (char*)memalign(Val1,Val2);
1110     arrcond2 = (char*)memalign(Val1,Val2);
1111     arrstart = (char*)memalign(Val1,Val2);
1112     arrready = (char*)memalign(Val1,Val2);
1113     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1114     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1115     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1116     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1117     /* initialize job structure */
1118     for(i=0; i<PetscMaxThreads; i++) {
1119       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1120       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1121       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1122       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1123     }
1124     for(i=0; i<PetscMaxThreads; i++) {
1125       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1126       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1127       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1128       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1129     }
1130     job_main.pfunc = NULL;
1131     job_main.pdata = (void**)malloc(N*sizeof(void*));
1132     pVal = (int*)malloc(N*sizeof(int));
1133     /* allocate memory in the heap for the thread structure */
1134     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1135     /* create threads */
1136     for(i=0; i<N; i++) {
1137       pVal[i] = i;
1138       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1139       /* error check */
1140     }
1141   }
1142   else {
1143   }
1144   return NULL;
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "PetscThreadFinalize_Main"
1149 PetscErrorCode PetscThreadFinalize_Main() {
1150   int i,ierr;
1151   void* jstatus;
1152 
1153   PetscFunctionBegin;
1154 
1155   if(PetscUseThreadPool) {
1156     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1157     /* join the threads */
1158     for(i=0; i<PetscMaxThreads; i++) {
1159       ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1160     }
1161     free(PetscThreadPoint);
1162     free(arrmutex);
1163     free(arrcond1);
1164     free(arrcond2);
1165     free(arrstart);
1166     free(arrready);
1167     free(job_main.pdata);
1168     free(pVal);
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MainWait_Main"
1175 void MainWait_Main() {
1176   int i,ierr;
1177   for(i=0; i<PetscMaxThreads; i++) {
1178     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1179     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1180       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1181     }
1182     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1183   }
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MainJob_Main"
1188 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1189   int i,ierr;
1190   PetscErrorCode ijoberr = 0;
1191   if(PetscUseThreadPool) {
1192     MainWait(); /* you know everyone is waiting to be signalled! */
1193     job_main.pfunc = pFunc;
1194     job_main.pdata = data;
1195     for(i=0; i<PetscMaxThreads; i++) {
1196       *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1197     }
1198     /* tell the threads to go to work */
1199     for(i=0; i<PetscMaxThreads; i++) {
1200       ierr = pthread_cond_signal(job_main.cond2array[i]);
1201     }
1202     if(pFunc!=FuncFinish) {
1203       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1204     }
1205   }
1206   else {
1207     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1208     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1209     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1210     free(apThread);
1211   }
1212   if(ithreaderr) {
1213     ijoberr = ithreaderr;
1214   }
1215   return ijoberr;
1216 }
1217 /****  ****/
1218 
1219 /**** Chain Thread Functions ****/
1220 void* PetscThreadFunc_Chain(void* arg) {
1221   PetscErrorCode iterr;
1222   int icorr,ierr;
1223   int* pId = (int*)arg;
1224   int ThreadId = *pId;
1225   int SubWorker = ThreadId + 1;
1226   PetscBool PeeOn;
1227   cpu_set_t mset;
1228   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1229   icorr = ThreadCoreAffinity[ThreadId];
1230   CPU_ZERO(&mset);
1231   CPU_SET(icorr,&mset);
1232   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1233 
1234   if(ThreadId==(PetscMaxThreads-1)) {
1235     PeeOn = PETSC_TRUE;
1236   }
1237   else {
1238     PeeOn = PETSC_FALSE;
1239   }
1240   if(PeeOn==PETSC_FALSE) {
1241     /* check your subordinate, wait for him to be ready */
1242     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1243     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1244       /* upon entry, automically releases the lock and blocks
1245        upon return, has the lock */
1246       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1247     }
1248     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1249     /* your subordinate is now ready*/
1250   }
1251   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1252   /* update your ready status */
1253   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1254   if(ThreadId==0) {
1255     job_chain.eJobStat = JobCompleted;
1256     /* signal main */
1257     ierr = pthread_cond_signal(&main_cond);
1258   }
1259   else {
1260     /* tell your boss that you're ready to work */
1261     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1262   }
1263   /*  the while loop needs to have an exit
1264      the 'main' thread can terminate all the threads by performing a broadcast
1265    and calling FuncFinish */
1266   while(PetscThreadGo) {
1267     /* need to check the condition to ensure we don't have to wait
1268        waiting when you don't have to causes problems
1269      also need to check the condition to ensure proper handling of spurious wakeups */
1270     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1271       /*upon entry, automically releases the lock and blocks
1272        upon return, has the lock */
1273         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1274 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1275 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1276     }
1277     if(ThreadId==0) {
1278       job_chain.startJob = PETSC_FALSE;
1279       job_chain.eJobStat = ThreadsWorking;
1280     }
1281     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1282     if(PeeOn==PETSC_FALSE) {
1283       /* tell your subworker it's time to get to work */
1284       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1285     }
1286     /* do your job */
1287     if(job_chain.pdata==NULL) {
1288       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1289     }
1290     else {
1291       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1292     }
1293     if(iterr!=0) {
1294       ithreaderr = 1;
1295     }
1296     if(PetscThreadGo) {
1297       /* reset job, get ready for more */
1298       if(PeeOn==PETSC_FALSE) {
1299         /* check your subordinate, wait for him to be ready
1300          how do you know for a fact that your subordinate has actually started? */
1301         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1302         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1303           /* upon entry, automically releases the lock and blocks
1304            upon return, has the lock */
1305           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1306         }
1307         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1308         /* your subordinate is now ready */
1309       }
1310       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1311       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1312       if(ThreadId==0) {
1313 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1314         /* root thread (foreman) signals 'main' */
1315         ierr = pthread_cond_signal(&main_cond);
1316       }
1317       else {
1318         /* signal your boss before you go to sleep */
1319         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1320       }
1321     }
1322   }
1323   return NULL;
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "PetscThreadInitialize_Chain"
1328 void* PetscThreadInitialize_Chain(PetscInt N) {
1329   PetscInt i,ierr;
1330   int status;
1331 
1332   if(PetscUseThreadPool) {
1333     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1334     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1335     arrmutex = (char*)memalign(Val1,Val2);
1336     arrcond1 = (char*)memalign(Val1,Val2);
1337     arrcond2 = (char*)memalign(Val1,Val2);
1338     arrstart = (char*)memalign(Val1,Val2);
1339     arrready = (char*)memalign(Val1,Val2);
1340     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1341     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1342     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1343     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1344     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1345     /* initialize job structure */
1346     for(i=0; i<PetscMaxThreads; i++) {
1347       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1348       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1349       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1350       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1351       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1352     }
1353     for(i=0; i<PetscMaxThreads; i++) {
1354       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1355       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1356       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1357       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1358       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1359     }
1360     job_chain.pfunc = NULL;
1361     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1362     job_chain.startJob = PETSC_FALSE;
1363     job_chain.eJobStat = JobInitiated;
1364     pVal = (int*)malloc(N*sizeof(int));
1365     /* allocate memory in the heap for the thread structure */
1366     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1367     /* create threads */
1368     for(i=0; i<N; i++) {
1369       pVal[i] = i;
1370       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1371       /* should check error */
1372     }
1373   }
1374   else {
1375   }
1376   return NULL;
1377 }
1378 
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "PetscThreadFinalize_Chain"
1382 PetscErrorCode PetscThreadFinalize_Chain() {
1383   int i,ierr;
1384   void* jstatus;
1385 
1386   PetscFunctionBegin;
1387 
1388   if(PetscUseThreadPool) {
1389     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1390     /* join the threads */
1391     for(i=0; i<PetscMaxThreads; i++) {
1392       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1393       /* should check error */
1394     }
1395     free(PetscThreadPoint);
1396     free(arrmutex);
1397     free(arrcond1);
1398     free(arrcond2);
1399     free(arrstart);
1400     free(arrready);
1401     free(job_chain.pdata);
1402     free(pVal);
1403   }
1404   else {
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MainWait_Chain"
1411 void MainWait_Chain() {
1412   int ierr;
1413   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1414   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1415     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1416   }
1417   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MainJob_Chain"
1422 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1423   int i,ierr;
1424   PetscErrorCode ijoberr = 0;
1425   if(PetscUseThreadPool) {
1426     MainWait();
1427     job_chain.pfunc = pFunc;
1428     job_chain.pdata = data;
1429     job_chain.startJob = PETSC_TRUE;
1430     for(i=0; i<PetscMaxThreads; i++) {
1431       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1432     }
1433     job_chain.eJobStat = JobInitiated;
1434     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1435     if(pFunc!=FuncFinish) {
1436       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1437     }
1438   }
1439   else {
1440     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1441     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1442     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1443     free(apThread);
1444   }
1445   if(ithreaderr) {
1446     ijoberr = ithreaderr;
1447   }
1448   return ijoberr;
1449 }
1450 /****  ****/
1451 
1452 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1453 /**** True Thread Functions ****/
1454 void* PetscThreadFunc_True(void* arg) {
1455   int icorr,ierr,iVal;
1456   int* pId = (int*)arg;
1457   int ThreadId = *pId;
1458   PetscErrorCode iterr;
1459   cpu_set_t mset;
1460   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1461   icorr = ThreadCoreAffinity[ThreadId];
1462   CPU_ZERO(&mset);
1463   CPU_SET(icorr,&mset);
1464   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1465 
1466   ierr = pthread_mutex_lock(&job_true.mutex);
1467   job_true.iNumReadyThreads++;
1468   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1469     ierr = pthread_cond_signal(&main_cond);
1470   }
1471   /*the while loop needs to have an exit
1472     the 'main' thread can terminate all the threads by performing a broadcast
1473    and calling FuncFinish */
1474   while(PetscThreadGo) {
1475     /*need to check the condition to ensure we don't have to wait
1476       waiting when you don't have to causes problems
1477      also need to wait if another thread sneaks in and messes with the predicate */
1478     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1479       /* upon entry, automically releases the lock and blocks
1480        upon return, has the lock */
1481       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1482     }
1483     job_true.startJob = PETSC_FALSE;
1484     job_true.iNumJobThreads--;
1485     job_true.iNumReadyThreads--;
1486     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1487     pthread_mutex_unlock(&job_true.mutex);
1488     if(job_true.pdata==NULL) {
1489       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1490     }
1491     else {
1492       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1493     }
1494     if(iterr!=0) {
1495       ithreaderr = 1;
1496     }
1497     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1498       what happens if a thread finishes before they all start? BAD!
1499      what happens if a thread finishes before any else start? BAD! */
1500     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1501     /* reset job */
1502     if(PetscThreadGo) {
1503       pthread_mutex_lock(&job_true.mutex);
1504       job_true.iNumReadyThreads++;
1505       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1506 	/* signal the 'main' thread that the job is done! (only done once) */
1507 	ierr = pthread_cond_signal(&main_cond);
1508       }
1509     }
1510   }
1511   return NULL;
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "PetscThreadInitialize_True"
1516 void* PetscThreadInitialize_True(PetscInt N) {
1517   PetscInt i;
1518   int status;
1519 
1520   if(PetscUseThreadPool) {
1521     pVal = (int*)malloc(N*sizeof(int));
1522     /* allocate memory in the heap for the thread structure */
1523     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1524     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1525     job_true.pdata = (void**)malloc(N*sizeof(void*));
1526     for(i=0; i<N; i++) {
1527       pVal[i] = i;
1528       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1529       /* error check to ensure proper thread creation */
1530       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1531       /* should check error */
1532     }
1533   }
1534   else {
1535   }
1536   return NULL;
1537 }
1538 
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "PetscThreadFinalize_True"
1542 PetscErrorCode PetscThreadFinalize_True() {
1543   int i,ierr;
1544   void* jstatus;
1545 
1546   PetscFunctionBegin;
1547 
1548   if(PetscUseThreadPool) {
1549     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1550     /* join the threads */
1551     for(i=0; i<PetscMaxThreads; i++) {
1552       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1553       /* should check error */
1554     }
1555     free(BarrPoint);
1556     free(PetscThreadPoint);
1557   }
1558   else {
1559   }
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MainWait_True"
1565 void MainWait_True() {
1566   int ierr;
1567   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1568     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1569   }
1570   ierr = pthread_mutex_unlock(&job_true.mutex);
1571 }
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MainJob_True"
1575 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1576   int ierr;
1577   PetscErrorCode ijoberr = 0;
1578   if(PetscUseThreadPool) {
1579     MainWait();
1580     job_true.pfunc = pFunc;
1581     job_true.pdata = data;
1582     job_true.pbarr = &BarrPoint[n];
1583     job_true.iNumJobThreads = n;
1584     job_true.startJob = PETSC_TRUE;
1585     ierr = pthread_cond_broadcast(&job_true.cond);
1586     if(pFunc!=FuncFinish) {
1587       MainWait(); /* why wait after? guarantees that job gets done */
1588     }
1589   }
1590   else {
1591     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1592     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1593     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1594     free(apThread);
1595   }
1596   if(ithreaderr) {
1597     ijoberr = ithreaderr;
1598   }
1599   return ijoberr;
1600 }
1601 /**** NO THREAD POOL FUNCTION ****/
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MainJob_Spawn"
1604 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1605   PetscErrorCode ijoberr = 0;
1606 
1607   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1608   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1609   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1610   free(apThread);
1611 
1612   return ijoberr;
1613 }
1614 /****  ****/
1615 #endif
1616 
1617 void* FuncFinish(void* arg) {
1618   PetscThreadGo = PETSC_FALSE;
1619   return(0);
1620 }
1621 
1622 #endif
1623