xref: /petsc/src/sys/objects/init.c (revision 1a9a603c15cfc509b9e0bea9be85336d3bf85e09)
1 //new kds file - implements all thread pool versions
2 /*
3 
4    This file defines part of the initialization of PETSc
5 
6   This file uses regular malloc and free because it cannot know
7   what malloc is being used until it has already processed the input.
8 */
9 
10 #define _GNU_SOURCE
11 #include <sched.h>
12 #include <petscsys.h>        /*I  "petscsys.h"   I*/
13 #include <pthread.h>
14 #include <sys/sysinfo.h>
15 #include <unistd.h>
16 #if defined(PETSC_HAVE_STDLIB_H)
17 #include <stdlib.h>
18 #endif
19 #if defined(PETSC_HAVE_MALLOC_H)
20 #include <malloc.h>
21 #endif
22 #if defined(PETSC_HAVE_VALGRIND)
23 #include <valgrind/valgrind.h>
24 #endif
25 
26 /* ------------------------Nasty global variables -------------------------------*/
27 /*
28      Indicates if PETSc started up MPI, or it was
29    already started before PETSc was initialized.
30 */
31 PetscBool    PetscBeganMPI         = PETSC_FALSE;
32 PetscBool    PetscInitializeCalled = PETSC_FALSE;
33 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
34 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
35 PetscBool    PetscThreadGo         = PETSC_TRUE;
36 PetscMPIInt  PetscGlobalRank = -1;
37 PetscMPIInt  PetscGlobalSize = -1;
38 PetscMPIInt  PetscMaxThreads = 2;
39 pthread_t*   PetscThreadPoint;
40 pthread_barrier_t* BarrPoint;   //used by 'true' thread pool
41 PetscErrorCode ithreaderr = 0;
42 int*         pVal;
43 
44 #define CACHE_LINE_SIZE 64  //used by 'chain', 'main','tree' thread pools
45 int* ThreadCoreAffinity;
46 
47 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  //used by 'chain','tree' thread pool
48 
49 typedef struct {
50   pthread_mutex_t** mutexarray;
51   pthread_cond_t**  cond1array;
52   pthread_cond_t** cond2array;
53   void* (*pfunc)(void*);
54   void** pdata;
55   PetscBool startJob;
56   estat eJobStat;
57   PetscBool** arrThreadStarted;
58   PetscBool** arrThreadReady;
59 } sjob_tree;
60 sjob_tree job_tree;
61 typedef struct {
62   pthread_mutex_t** mutexarray;
63   pthread_cond_t**  cond1array;
64   pthread_cond_t** cond2array;
65   void* (*pfunc)(void*);
66   void** pdata;
67   PetscBool** arrThreadReady;
68 } sjob_main;
69 sjob_main job_main;
70 typedef struct {
71   pthread_mutex_t** mutexarray;
72   pthread_cond_t**  cond1array;
73   pthread_cond_t** cond2array;
74   void* (*pfunc)(void*);
75   void** pdata;
76   PetscBool startJob;
77   estat eJobStat;
78   PetscBool** arrThreadStarted;
79   PetscBool** arrThreadReady;
80 } sjob_chain;
81 sjob_chain job_chain;
82 typedef struct {
83   pthread_mutex_t mutex;
84   pthread_cond_t cond;
85   void* (*pfunc)(void*);
86   void** pdata;
87   pthread_barrier_t* pbarr;
88   int iNumJobThreads;
89   int iNumReadyThreads;
90   PetscBool startJob;
91 } sjob_true;
92 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
93 
94 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  //used by 'true', 'chain','tree' thread pools
95 char* arrmutex; //used by 'chain','main','tree' thread pools
96 char* arrcond1; //used by 'chain','main','tree' thread pools
97 char* arrcond2; //used by 'chain','main','tree' thread pools
98 char* arrstart; //used by 'chain','main','tree' thread pools
99 char* arrready; //used by 'chain','main','tree' thread pools
100 
101 /* Function Pointers */
102 void*          (*PetscThreadFunc)(void*) = NULL;
103 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
104 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
105 void           (*MainWait)(void) = NULL;
106 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
107 /**** Tree Functions ****/
108 void*          PetscThreadFunc_Tree(void*);
109 void*          PetscThreadInitialize_Tree(PetscInt);
110 PetscErrorCode PetscThreadFinalize_Tree(void);
111 void           MainWait_Tree(void);
112 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
113 /**** Main Functions ****/
114 void*          PetscThreadFunc_Main(void*);
115 void*          PetscThreadInitialize_Main(PetscInt);
116 PetscErrorCode PetscThreadFinalize_Main(void);
117 void           MainWait_Main(void);
118 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
119 /**** Chain Functions ****/
120 void*          PetscThreadFunc_Chain(void*);
121 void*          PetscThreadInitialize_Chain(PetscInt);
122 PetscErrorCode PetscThreadFinalize_Chain(void);
123 void           MainWait_Chain(void);
124 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
125 /**** True Functions ****/
126 void*          PetscThreadFunc_True(void*);
127 void*          PetscThreadInitialize_True(PetscInt);
128 PetscErrorCode PetscThreadFinalize_True(void);
129 void           MainWait_True(void);
130 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
131 /****  ****/
132 
133 void* FuncFinish(void*);
134 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
135 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
136 
137 #if defined(PETSC_USE_COMPLEX)
138 #if defined(PETSC_COMPLEX_INSTANTIATE)
139 template <> class std::complex<double>; /* instantiate complex template class */
140 #endif
141 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
142 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
143 MPI_Datatype   MPI_C_COMPLEX;
144 #endif
145 PetscScalar    PETSC_i;
146 #else
147 PetscScalar    PETSC_i = 0.0;
148 #endif
149 #if defined(PETSC_USE_REAL___FLOAT128)
150 MPI_Datatype   MPIU___FLOAT128 = 0;
151 #endif
152 MPI_Datatype   MPIU_2SCALAR = 0;
153 MPI_Datatype   MPIU_2INT = 0;
154 
155 /*
156      These are needed by petscbt.h
157 */
158 #include <petscbt.h>
159 char      _BT_mask = ' ';
160 char      _BT_c = ' ';
161 PetscInt  _BT_idx  = 0;
162 
163 /*
164        Function that is called to display all error messages
165 */
166 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
167 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
168 #if defined(PETSC_HAVE_MATLAB_ENGINE)
169 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
170 #else
171 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
172 #endif
173 /*
174   This is needed to turn on/off cusp synchronization */
175 PetscBool   synchronizeCUSP = PETSC_FALSE;
176 
177 /* ------------------------------------------------------------------------------*/
178 /*
179    Optional file where all PETSc output from various prints is saved
180 */
181 FILE *petsc_history = PETSC_NULL;
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "PetscOpenHistoryFile"
185 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
186 {
187   PetscErrorCode ierr;
188   PetscMPIInt    rank,size;
189   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
190   char           version[256];
191 
192   PetscFunctionBegin;
193   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
194   if (!rank) {
195     char        arch[10];
196     int         err;
197     PetscViewer viewer;
198 
199     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
200     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
201     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
202     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
203     if (filename) {
204       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
205     } else {
206       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
207       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
208       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
209     }
210 
211     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
212     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
213     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
214     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
215     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
216     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
217     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
218     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
219     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
220     err = fflush(*fd);
221     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
222   }
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "PetscCloseHistoryFile"
228 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
229 {
230   PetscErrorCode ierr;
231   PetscMPIInt    rank;
232   char           date[64];
233   int            err;
234 
235   PetscFunctionBegin;
236   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
237   if (!rank) {
238     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
239     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
240     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
241     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
242     err = fflush(*fd);
243     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
244     err = fclose(*fd);
245     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
246   }
247   PetscFunctionReturn(0);
248 }
249 
250 /* ------------------------------------------------------------------------------*/
251 
252 /*
253    This is ugly and probably belongs somewhere else, but I want to
254   be able to put a true MPI abort error handler with command line args.
255 
256     This is so MPI errors in the debugger will leave all the stack
257   frames. The default MP_Abort() cleans up and exits thus providing no useful information
258   in the debugger hence we call abort() instead of MPI_Abort().
259 */
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "Petsc_MPI_AbortOnError"
263 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
264 {
265   PetscFunctionBegin;
266   (*PetscErrorPrintf)("MPI error %d\n",*flag);
267   abort();
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
272 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
273 {
274   PetscErrorCode ierr;
275 
276   PetscFunctionBegin;
277   (*PetscErrorPrintf)("MPI error %d\n",*flag);
278   ierr = PetscAttachDebugger();
279   if (ierr) { /* hopeless so get out */
280     MPI_Abort(*comm,*flag);
281   }
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "PetscEnd"
286 /*@C
287    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
288      wishes a clean exit somewhere deep in the program.
289 
290    Collective on PETSC_COMM_WORLD
291 
292    Options Database Keys are the same as for PetscFinalize()
293 
294    Level: advanced
295 
296    Note:
297    See PetscInitialize() for more general runtime options.
298 
299 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
300 @*/
301 PetscErrorCode  PetscEnd(void)
302 {
303   PetscFunctionBegin;
304   PetscFinalize();
305   exit(0);
306   return 0;
307 }
308 
309 PetscBool    PetscOptionsPublish = PETSC_FALSE;
310 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
311 extern PetscBool  petscsetmallocvisited;
312 static char       emacsmachinename[256];
313 
314 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
315 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "PetscSetHelpVersionFunctions"
319 /*@C
320    PetscSetHelpVersionFunctions - Sets functions that print help and version information
321    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
322    This routine enables a "higher-level" package that uses PETSc to print its messages first.
323 
324    Input Parameter:
325 +  help - the help function (may be PETSC_NULL)
326 -  version - the version function (may be PETSC_NULL)
327 
328    Level: developer
329 
330    Concepts: package help message
331 
332 @*/
333 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
334 {
335   PetscFunctionBegin;
336   PetscExternalHelpFunction    = help;
337   PetscExternalVersionFunction = version;
338   PetscFunctionReturn(0);
339 }
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
343 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
344 {
345   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
346   MPI_Comm       comm = PETSC_COMM_WORLD;
347   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
348   PetscErrorCode ierr;
349   PetscReal      si;
350   int            i;
351   PetscMPIInt    rank;
352   char           version[256];
353 
354   PetscFunctionBegin;
355   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
356 
357   /*
358       Setup the memory management; support for tracing malloc() usage
359   */
360   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
361 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
362   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
363   if ((!flg2 || flg1) && !petscsetmallocvisited) {
364 #if defined(PETSC_HAVE_VALGRIND)
365     if (flg2 || !(RUNNING_ON_VALGRIND)) {
366       /* turn off default -malloc if valgrind is being used */
367 #endif
368       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
369 #if defined(PETSC_HAVE_VALGRIND)
370     }
371 #endif
372   }
373 #else
374   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
375   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
376   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
377 #endif
378   if (flg3) {
379     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
380   }
381   flg1 = PETSC_FALSE;
382   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
383   if (flg1) {
384     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
385     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
386   }
387 
388   flg1 = PETSC_FALSE;
389   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
390   if (!flg1) {
391     flg1 = PETSC_FALSE;
392     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
393   }
394   if (flg1) {
395     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
396   }
397 
398   /*
399       Set the display variable for graphics
400   */
401   ierr = PetscSetDisplay();CHKERRQ(ierr);
402 
403   /*
404       Determine whether user specified maximum number of threads
405    */
406   ierr = PetscOptionsHasName(PETSC_NULL,"-thread_max",&flg1);CHKERRQ(ierr);
407   if(flg1) {
408     ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
409   }
410 
411   /*
412       Determine whether to use thread pool
413    */
414   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
415   if(flg1) {
416     PetscUseThreadPool = PETSC_TRUE;
417     PetscInt N_CORES = get_nprocs();
418     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
419     char tstr[9];
420     char tbuf[2];
421     strcpy(tstr,"-thread");
422     for(i=0;i<PetscMaxThreads;i++) {
423       ThreadCoreAffinity[i] = i;  //default
424       sprintf(tbuf,"%d",i);
425       strcat(tstr,tbuf);
426       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
427       if(flg1) {
428         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
429         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; //check on the user
430       }
431       tstr[7] = '\0';
432     }
433     //get the thread pool type
434     PetscInt ipool = 0;
435     ierr = PetscOptionsGetInt(PETSC_NULL,"-pool",&ipool,PETSC_NULL);CHKERRQ(ierr);
436     switch(ipool) {
437     case 1:
438       PetscThreadFunc       = &PetscThreadFunc_Tree;
439       PetscThreadInitialize = &PetscThreadInitialize_Tree;
440       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
441       MainWait              = &MainWait_Tree;
442       MainJob               = &MainJob_Tree;
443       break;
444     case 2:
445       PetscThreadFunc       = &PetscThreadFunc_Main;
446       PetscThreadInitialize = &PetscThreadInitialize_Main;
447       PetscThreadFinalize   = &PetscThreadFinalize_Main;
448       MainWait              = &MainWait_Main;
449       MainJob               = &MainJob_Main;
450       break;
451     case 3:
452       PetscThreadFunc       = &PetscThreadFunc_Chain;
453       PetscThreadInitialize = &PetscThreadInitialize_Chain;
454       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
455       MainWait              = &MainWait_Chain;
456       MainJob               = &MainJob_Chain;
457       break;
458     default:
459       PetscThreadFunc       = &PetscThreadFunc_True;
460       PetscThreadInitialize = &PetscThreadInitialize_True;
461       PetscThreadFinalize   = &PetscThreadFinalize_True;
462       MainWait              = &MainWait_True;
463       MainJob               = &MainJob_True;
464       break;
465     }
466   }
467   else {
468     //need to define these in the case on 'no threads' or 'thread create/destroy'
469     //could take any of the above versions
470     PetscThreadInitialize = &PetscThreadInitialize_True;
471     PetscThreadFinalize   = &PetscThreadFinalize_True;
472     MainJob               = &MainJob_True;
473   }
474   PetscThreadInitialize(PetscMaxThreads);
475   /*
476       Print the PETSc version information
477   */
478   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
479   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
480   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
481   if (flg1 || flg2 || flg3){
482 
483     /*
484        Print "higher-level" package version message
485     */
486     if (PetscExternalVersionFunction) {
487       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
488     }
489 
490     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
491     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
492 ------------------------------\n");CHKERRQ(ierr);
493     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
494     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
495     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
496     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
497     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
498     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
499     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
500 ------------------------------\n");CHKERRQ(ierr);
501   }
502 
503   /*
504        Print "higher-level" package help message
505   */
506   if (flg3){
507     if (PetscExternalHelpFunction) {
508       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
509     }
510   }
511 
512   /*
513       Setup the error handling
514   */
515   flg1 = PETSC_FALSE;
516   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
517   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
518   flg1 = PETSC_FALSE;
519   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
520   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
521   flg1 = PETSC_FALSE;
522   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
523   if (flg1) {
524     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
525   }
526   flg1 = PETSC_FALSE;
527   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
528   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
529   flg1 = PETSC_FALSE;
530   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
531   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
532 
533   /*
534       Setup debugger information
535   */
536   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
537   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
538   if (flg1) {
539     MPI_Errhandler err_handler;
540 
541     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
542     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
543     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
544     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
545   }
546   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
547   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
548   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
549   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
550   if (flg1 || flg2) {
551     PetscMPIInt    size;
552     PetscInt       lsize,*nodes;
553     MPI_Errhandler err_handler;
554     /*
555        we have to make sure that all processors have opened
556        connections to all other processors, otherwise once the
557        debugger has stated it is likely to receive a SIGUSR1
558        and kill the program.
559     */
560     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
561     if (size > 2) {
562       PetscMPIInt dummy = 0;
563       MPI_Status  status;
564       for (i=0; i<size; i++) {
565         if (rank != i) {
566           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
567         }
568       }
569       for (i=0; i<size; i++) {
570         if (rank != i) {
571           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
572         }
573       }
574     }
575     /* check if this processor node should be in debugger */
576     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
577     lsize = size;
578     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
579     if (flag) {
580       for (i=0; i<lsize; i++) {
581         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
582       }
583     }
584     if (!flag) {
585       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
586       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
587       if (flg1) {
588         ierr = PetscAttachDebugger();CHKERRQ(ierr);
589       } else {
590         ierr = PetscStopForDebugger();CHKERRQ(ierr);
591       }
592       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
593       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
594     }
595     ierr = PetscFree(nodes);CHKERRQ(ierr);
596   }
597 
598   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
599   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
600 
601 #if defined(PETSC_USE_SOCKET_VIEWER)
602   /*
603     Activates new sockets for zope if needed
604   */
605   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
606   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
607   if (flgz){
608     int  sockfd;
609     char hostname[256];
610     char username[256];
611     int  remoteport = 9999;
612 
613     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
614     if (!hostname[0]){
615       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
616     }
617     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
618     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
619     PETSC_ZOPEFD = fdopen(sockfd, "w");
620     if (flgzout){
621       PETSC_STDOUT = PETSC_ZOPEFD;
622       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
623       fprintf(PETSC_STDOUT, "<<<start>>>");
624     } else {
625       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
626       fprintf(PETSC_ZOPEFD, "<<<start>>>");
627     }
628   }
629 #endif
630 #if defined(PETSC_USE_SERVER)
631   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
632   if (flgz){
633     PetscInt port = PETSC_DECIDE;
634     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
635     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
636   }
637 #endif
638 
639   /*
640         Setup profiling and logging
641   */
642 #if defined (PETSC_USE_INFO)
643   {
644     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
645     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
646     if (flg1 && logname[0]) {
647       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
648     } else if (flg1) {
649       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
650     }
651   }
652 #endif
653 #if defined(PETSC_USE_LOG)
654   mname[0] = 0;
655   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
656   if (flg1) {
657     if (mname[0]) {
658       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
659     } else {
660       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
661     }
662   }
663 #if defined(PETSC_HAVE_MPE)
664   flg1 = PETSC_FALSE;
665   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
666   if (flg1) PetscLogMPEBegin();
667 #endif
668   flg1 = PETSC_FALSE;
669   flg2 = PETSC_FALSE;
670   flg3 = PETSC_FALSE;
671   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
672   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
673   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
674   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
675   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
676   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
677 
678   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
679   if (flg1) {
680     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
681     FILE *file;
682     if (mname[0]) {
683       sprintf(name,"%s.%d",mname,rank);
684       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
685       file = fopen(fname,"w");
686       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
687     } else {
688       file = PETSC_STDOUT;
689     }
690     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
691   }
692 #endif
693 
694   /*
695       Setup building of stack frames for all function calls
696   */
697 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
698   ierr = PetscStackCreate();CHKERRQ(ierr);
699 #endif
700 
701   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
702 
703   /*
704        Print basic help message
705   */
706   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
707   if (flg1) {
708     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
709     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
710     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
711     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
712     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
713     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
714     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
715     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
716     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
717     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
718     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
719     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
720     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
721     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
722     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
723     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
724     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
725     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
726     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
727     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
728     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
729     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
730     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
731     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
732     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
733     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
734     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
735     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
736     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
737     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
738     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
739     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
740     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
741 #if defined(PETSC_USE_LOG)
742     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
743     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
745 #if defined(PETSC_HAVE_MPE)
746     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
747 #endif
748     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
749 #endif
750     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
754   }
755 
756   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
757   if (flg1) {
758     ierr = PetscSleep(si);CHKERRQ(ierr);
759   }
760 
761   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
762   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
763   if (f) {
764     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
765   }
766 
767 #if defined(PETSC_HAVE_CUSP)
768   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
769   if (flg3) flg1 = PETSC_TRUE;
770   else flg1 = PETSC_FALSE;
771   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
772   if (flg1) synchronizeCUSP = PETSC_TRUE;
773 #endif
774 
775   PetscFunctionReturn(0);
776 }
777 
778 /**** 'Tree' Thread Pool Functions ****/
779 void* PetscThreadFunc_Tree(void* arg) {
780   PetscErrorCode iterr;
781   int icorr,ierr;
782   int* pId = (int*)arg;
783   int ThreadId = *pId,Mary = 2,i,SubWorker;
784   PetscBool PeeOn;
785   cpu_set_t mset;
786   //printf("Thread %d In Tree Thread Function\n",ThreadId);
787   icorr = ThreadCoreAffinity[ThreadId];
788   CPU_ZERO(&mset);
789   CPU_SET(icorr,&mset);
790   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
791 
792   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
793     PeeOn = PETSC_TRUE;
794   }
795   else {
796     PeeOn = PETSC_FALSE;
797   }
798   if(PeeOn==PETSC_FALSE) {
799     //check your subordinates, wait for them to be ready
800     for(i=1;i<=Mary;i++) {
801       SubWorker = Mary*ThreadId+i;
802       if(SubWorker<PetscMaxThreads) {
803         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
804         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
805           //upon entry, automically releases the lock and blocks
806           //upon return, has the lock
807           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
808         }
809         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
810       }
811     }
812     //your subordinates are now ready
813   }
814   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
815   //update your ready status
816   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
817   if(ThreadId==0) {
818     job_tree.eJobStat = JobCompleted;
819     //signal main
820     ierr = pthread_cond_signal(&main_cond);
821   }
822   else {
823     //tell your boss that you're ready to work
824     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
825   }
826   //the while loop needs to have an exit
827   //the 'main' thread can terminate all the threads by performing a broadcast
828   //and calling FuncFinish
829   while(PetscThreadGo) {
830     //need to check the condition to ensure we don't have to wait
831     //waiting when you don't have to causes problems
832     //also need to check the condition to ensure proper handling of spurious wakeups
833     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
834         //upon entry, automically releases the lock and blocks
835         //upon return, has the lock
836         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
837 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
838 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
839     }
840     if(ThreadId==0) {
841       job_tree.startJob = PETSC_FALSE;
842       job_tree.eJobStat = ThreadsWorking;
843     }
844     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
845     if(PeeOn==PETSC_FALSE) {
846       //tell your subordinates it's time to get to work
847       for(i=1; i<=Mary; i++) {
848 	SubWorker = Mary*ThreadId+i;
849         if(SubWorker<PetscMaxThreads) {
850           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
851         }
852       }
853     }
854     //do your job
855     if(job_tree.pdata==NULL) {
856       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
857     }
858     else {
859       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
860     }
861     if(iterr!=0) {
862       ithreaderr = 1;
863     }
864     if(PetscThreadGo) {
865       //reset job, get ready for more
866       if(PeeOn==PETSC_FALSE) {
867         //check your subordinates, waiting for them to be ready
868 	//how do you know for a fact that a given subordinate has actually started?
869 	for(i=1;i<=Mary;i++) {
870 	  SubWorker = Mary*ThreadId+i;
871           if(SubWorker<PetscMaxThreads) {
872             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
873             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
874               //upon entry, automically releases the lock and blocks
875               //upon return, has the lock
876               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
877             }
878             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
879           }
880 	}
881         //your subordinates are now ready
882       }
883       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
884       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
885       if(ThreadId==0) {
886 	job_tree.eJobStat = JobCompleted; //root thread: last thread to complete, guaranteed!
887         //root thread signals 'main'
888         ierr = pthread_cond_signal(&main_cond);
889       }
890       else {
891         //signal your boss before you go to sleep
892         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
893       }
894     }
895   }
896   return NULL;
897 }
898 
899 #undef __FUNCT__
900 #define __FUNCT__ "PetscThreadInitialize_Tree"
901 void* PetscThreadInitialize_Tree(PetscInt N) {
902   PetscInt i,ierr;
903   int status;
904 
905   if(PetscUseThreadPool) {
906     size_t Val1 = (size_t)CACHE_LINE_SIZE;
907     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
908     arrmutex = (char*)memalign(Val1,Val2);
909     arrcond1 = (char*)memalign(Val1,Val2);
910     arrcond2 = (char*)memalign(Val1,Val2);
911     arrstart = (char*)memalign(Val1,Val2);
912     arrready = (char*)memalign(Val1,Val2);
913     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
914     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
915     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
916     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
917     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
918     //initialize job structure
919     for(i=0; i<PetscMaxThreads; i++) {
920       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
921       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
922       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
923       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
924       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
925     }
926     for(i=0; i<PetscMaxThreads; i++) {
927       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
928       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
929       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
930       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
931       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
932     }
933     job_tree.pfunc = NULL;
934     job_tree.pdata = (void**)malloc(N*sizeof(void*));
935     job_tree.startJob = PETSC_FALSE;
936     job_tree.eJobStat = JobInitiated;
937     pVal = (int*)malloc(N*sizeof(int));
938     //allocate memory in the heap for the thread structure
939     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
940     //create threads
941     for(i=0; i<N; i++) {
942       pVal[i] = i;
943       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
944       //error check
945     }
946   }
947   else {
948     //do nothing
949   }
950   return NULL;
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "PetscThreadFinalize_Tree"
955 PetscErrorCode PetscThreadFinalize_Tree() {
956   int i,ierr;
957   void* jstatus;
958 
959   PetscFunctionBegin;
960 
961   if(PetscUseThreadPool) {
962     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
963     //join the threads
964     for(i=0; i<PetscMaxThreads; i++) {
965       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
966       //do error checking
967     }
968     free(PetscThreadPoint);
969     free(arrmutex);
970     free(arrcond1);
971     free(arrcond2);
972     free(arrstart);
973     free(arrready);
974     free(job_tree.pdata);
975     free(pVal);
976   }
977   else {
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MainWait_Tree"
984 void MainWait_Tree() {
985   int ierr;
986   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
987   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
988     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
989   }
990   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MainJob_Tree"
995 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
996   int i,ierr;
997   PetscErrorCode ijoberr = 0;
998   if(PetscUseThreadPool) {
999     MainWait();
1000     job_tree.pfunc = pFunc;
1001     job_tree.pdata = data;
1002     job_tree.startJob = PETSC_TRUE;
1003     for(i=0; i<PetscMaxThreads; i++) {
1004       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1005     }
1006     job_tree.eJobStat = JobInitiated;
1007     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1008     if(pFunc!=FuncFinish) {
1009       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1010     }
1011   }
1012   else {
1013     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1014     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1015     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1016     free(apThread);
1017   }
1018   if(ithreaderr) {
1019     ijoberr = ithreaderr;
1020   }
1021   return ijoberr;
1022 }
1023 /****  ****/
1024 
1025 /**** 'Main' Thread Pool Functions ****/
1026 void* PetscThreadFunc_Main(void* arg) {
1027   PetscErrorCode iterr;
1028   int icorr,ierr;
1029   int* pId = (int*)arg;
1030   int ThreadId = *pId;
1031   cpu_set_t mset;
1032   //printf("Thread %d In Main Thread Function\n",ThreadId);
1033   icorr = ThreadCoreAffinity[ThreadId];
1034   CPU_ZERO(&mset);
1035   CPU_SET(icorr,&mset);
1036   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1037 
1038   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1039   //update your ready status
1040   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1041   //tell the BOSS that you're ready to work before you go to sleep
1042   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1043 
1044   //the while loop needs to have an exit
1045   //the 'main' thread can terminate all the threads by performing a broadcast
1046   //and calling FuncFinish
1047   while(PetscThreadGo) {
1048     //need to check the condition to ensure we don't have to wait
1049     //waiting when you don't have to causes problems
1050     //also need to check the condition to ensure proper handling of spurious wakeups
1051     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1052         //upon entry, atomically releases the lock and blocks
1053         //upon return, has the lock
1054         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1055 	//*(job_main.arrThreadReady[ThreadId])   = PETSC_FALSE;
1056     }
1057     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1058     //do your job
1059     if(job_main.pdata==NULL) {
1060       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1061     }
1062     else {
1063       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1064     }
1065     if(iterr!=0) {
1066       ithreaderr = 1;
1067     }
1068     if(PetscThreadGo) {
1069       //reset job, get ready for more
1070       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1071       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1072       //tell the BOSS that you're ready to work before you go to sleep
1073       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1074     }
1075   }
1076   return NULL;
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "PetscThreadInitialize_Main"
1081 void* PetscThreadInitialize_Main(PetscInt N) {
1082   PetscInt i,ierr;
1083   int status;
1084 
1085   if(PetscUseThreadPool) {
1086     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1087     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1088     arrmutex = (char*)memalign(Val1,Val2);
1089     arrcond1 = (char*)memalign(Val1,Val2);
1090     arrcond2 = (char*)memalign(Val1,Val2);
1091     arrstart = (char*)memalign(Val1,Val2);
1092     arrready = (char*)memalign(Val1,Val2);
1093     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1094     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1095     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1096     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1097     //initialize job structure
1098     for(i=0; i<PetscMaxThreads; i++) {
1099       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1100       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1101       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1102       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1103     }
1104     for(i=0; i<PetscMaxThreads; i++) {
1105       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1106       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1107       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1108       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1109     }
1110     job_main.pfunc = NULL;
1111     job_main.pdata = (void**)malloc(N*sizeof(void*));
1112     pVal = (int*)malloc(N*sizeof(int));
1113     //allocate memory in the heap for the thread structure
1114     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1115     //create threads
1116     for(i=0; i<N; i++) {
1117       pVal[i] = i;
1118       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1119       //error check
1120     }
1121   }
1122   else {
1123   }
1124   return NULL;
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "PetscThreadFinalize_Main"
1129 PetscErrorCode PetscThreadFinalize_Main() {
1130   int i,ierr;
1131   void* jstatus;
1132 
1133   PetscFunctionBegin;
1134 
1135   if(PetscUseThreadPool) {
1136     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
1137     //join the threads
1138     for(i=0; i<PetscMaxThreads; i++) {
1139       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1140       //do error checking
1141     }
1142     free(PetscThreadPoint);
1143     free(arrmutex);
1144     free(arrcond1);
1145     free(arrcond2);
1146     free(arrstart);
1147     free(arrready);
1148     free(job_main.pdata);
1149     free(pVal);
1150   }
1151   else {
1152   }
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MainWait_Main"
1158 void MainWait_Main() {
1159   int i,ierr;
1160   for(i=0; i<PetscMaxThreads; i++) {
1161     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1162     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1163       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1164     }
1165     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1166   }
1167 }
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MainJob_Main"
1171 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1172   int i,ierr;
1173   PetscErrorCode ijoberr = 0;
1174   if(PetscUseThreadPool) {
1175     MainWait(); //you know everyone is waiting to be signalled!
1176     job_main.pfunc = pFunc;
1177     job_main.pdata = data;
1178     for(i=0; i<PetscMaxThreads; i++) {
1179       *(job_main.arrThreadReady[i]) = PETSC_FALSE; //why do this?  suppose you get into MainWait first
1180     }
1181     //tell the threads to go to work
1182     for(i=0; i<PetscMaxThreads; i++) {
1183       ierr = pthread_cond_signal(job_main.cond2array[i]);
1184     }
1185     if(pFunc!=FuncFinish) {
1186       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1187     }
1188   }
1189   else {
1190     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1191     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1192     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1193     free(apThread);
1194   }
1195   if(ithreaderr) {
1196     ijoberr = ithreaderr;
1197   }
1198   return ijoberr;
1199 }
1200 /****  ****/
1201 
1202 /**** Chain Thread Functions ****/
1203 void* PetscThreadFunc_Chain(void* arg) {
1204   PetscErrorCode iterr;
1205   int icorr,ierr;
1206   int* pId = (int*)arg;
1207   int ThreadId = *pId;
1208   int SubWorker = ThreadId + 1;
1209   PetscBool PeeOn;
1210   cpu_set_t mset;
1211   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1212   icorr = ThreadCoreAffinity[ThreadId];
1213   CPU_ZERO(&mset);
1214   CPU_SET(icorr,&mset);
1215   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1216 
1217   if(ThreadId==(PetscMaxThreads-1)) {
1218     PeeOn = PETSC_TRUE;
1219   }
1220   else {
1221     PeeOn = PETSC_FALSE;
1222   }
1223   if(PeeOn==PETSC_FALSE) {
1224     //check your subordinate, wait for him to be ready
1225     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1226     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1227       //upon entry, automically releases the lock and blocks
1228       //upon return, has the lock
1229       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1230     }
1231     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1232     //your subordinate is now ready
1233   }
1234   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1235   //update your ready status
1236   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1237   if(ThreadId==0) {
1238     job_chain.eJobStat = JobCompleted;
1239     //signal main
1240     ierr = pthread_cond_signal(&main_cond);
1241   }
1242   else {
1243     //tell your boss that you're ready to work
1244     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1245   }
1246   //the while loop needs to have an exit
1247   //the 'main' thread can terminate all the threads by performing a broadcast
1248   //and calling FuncFinish
1249   while(PetscThreadGo) {
1250     //need to check the condition to ensure we don't have to wait
1251     //waiting when you don't have to causes problems
1252     //also need to check the condition to ensure proper handling of spurious wakeups
1253     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1254         //upon entry, automically releases the lock and blocks
1255         //upon return, has the lock
1256         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1257 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1258 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1259     }
1260     if(ThreadId==0) {
1261       job_chain.startJob = PETSC_FALSE;
1262       job_chain.eJobStat = ThreadsWorking;
1263     }
1264     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1265     if(PeeOn==PETSC_FALSE) {
1266       //tell your subworker it's time to get to work
1267       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1268     }
1269     //do your job
1270     if(job_chain.pdata==NULL) {
1271       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1272     }
1273     else {
1274       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1275     }
1276     if(iterr!=0) {
1277       ithreaderr = 1;
1278     }
1279     if(PetscThreadGo) {
1280       //reset job, get ready for more
1281       if(PeeOn==PETSC_FALSE) {
1282         //check your subordinate, wait for him to be ready
1283 	//how do you know for a fact that your subordinate has actually started?
1284         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1285         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1286           //upon entry, automically releases the lock and blocks
1287           //upon return, has the lock
1288           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1289         }
1290         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1291         //your subordinate is now ready
1292       }
1293       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1294       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1295       if(ThreadId==0) {
1296 	job_chain.eJobStat = JobCompleted; //foreman: last thread to complete, guaranteed!
1297         //root thread (foreman) signals 'main'
1298         ierr = pthread_cond_signal(&main_cond);
1299       }
1300       else {
1301         //signal your boss before you go to sleep
1302         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1303       }
1304     }
1305   }
1306   return NULL;
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "PetscThreadInitialize_Chain"
1311 void* PetscThreadInitialize_Chain(PetscInt N) {
1312   PetscInt i,ierr;
1313   int status;
1314 
1315   if(PetscUseThreadPool) {
1316     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1317     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1318     arrmutex = (char*)memalign(Val1,Val2);
1319     arrcond1 = (char*)memalign(Val1,Val2);
1320     arrcond2 = (char*)memalign(Val1,Val2);
1321     arrstart = (char*)memalign(Val1,Val2);
1322     arrready = (char*)memalign(Val1,Val2);
1323     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1324     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1325     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1326     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1327     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1328     //initialize job structure
1329     for(i=0; i<PetscMaxThreads; i++) {
1330       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1331       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1332       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1333       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1334       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1335     }
1336     for(i=0; i<PetscMaxThreads; i++) {
1337       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1338       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1339       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1340       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1341       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1342     }
1343     job_chain.pfunc = NULL;
1344     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1345     job_chain.startJob = PETSC_FALSE;
1346     job_chain.eJobStat = JobInitiated;
1347     pVal = (int*)malloc(N*sizeof(int));
1348     //allocate memory in the heap for the thread structure
1349     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1350     //create threads
1351     for(i=0; i<N; i++) {
1352       pVal[i] = i;
1353       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1354       //error check
1355     }
1356   }
1357   else {
1358   }
1359   return NULL;
1360 }
1361 
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "PetscThreadFinalize_Chain"
1365 PetscErrorCode PetscThreadFinalize_Chain() {
1366   int i,ierr;
1367   void* jstatus;
1368 
1369   PetscFunctionBegin;
1370 
1371   if(PetscUseThreadPool) {
1372     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
1373     //join the threads
1374     for(i=0; i<PetscMaxThreads; i++) {
1375       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1376       //do error checking
1377     }
1378     free(PetscThreadPoint);
1379     free(arrmutex);
1380     free(arrcond1);
1381     free(arrcond2);
1382     free(arrstart);
1383     free(arrready);
1384     free(job_chain.pdata);
1385     free(pVal);
1386   }
1387   else {
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MainWait_Chain"
1394 void MainWait_Chain() {
1395   int ierr;
1396   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1397   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1398     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1399   }
1400   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MainJob_Chain"
1405 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1406   int i,ierr;
1407   PetscErrorCode ijoberr = 0;
1408   if(PetscUseThreadPool) {
1409     MainWait();
1410     job_chain.pfunc = pFunc;
1411     job_chain.pdata = data;
1412     job_chain.startJob = PETSC_TRUE;
1413     for(i=0; i<PetscMaxThreads; i++) {
1414       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1415     }
1416     job_chain.eJobStat = JobInitiated;
1417     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1418     if(pFunc!=FuncFinish) {
1419       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1420     }
1421   }
1422   else {
1423     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1424     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1425     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1426     free(apThread);
1427   }
1428   if(ithreaderr) {
1429     ijoberr = ithreaderr;
1430   }
1431   return ijoberr;
1432 }
1433 /****  ****/
1434 
1435 /**** True Thread Functions ****/
1436 void* PetscThreadFunc_True(void* arg) {
1437   int icorr,ierr,iVal;
1438   int* pId = (int*)arg;
1439   int ThreadId = *pId;
1440   PetscErrorCode iterr;
1441   cpu_set_t mset;
1442   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1443   icorr = ThreadCoreAffinity[ThreadId];
1444   CPU_ZERO(&mset);
1445   CPU_SET(icorr,&mset);
1446   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1447 
1448   ierr = pthread_mutex_lock(&job_true.mutex);
1449   job_true.iNumReadyThreads++;
1450   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1451     ierr = pthread_cond_signal(&main_cond);
1452   }
1453   //the while loop needs to have an exit
1454   //the 'main' thread can terminate all the threads by performing a broadcast
1455   //and calling FuncFinish
1456   while(PetscThreadGo) {
1457     //need to check the condition to ensure we don't have to wait
1458     //waiting when you don't have to causes problems
1459     //also need to wait if another thread sneaks in and messes with the predicate
1460     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1461       //upon entry, automically releases the lock and blocks
1462       //upon return, has the lock
1463       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1464     }
1465     job_true.startJob = PETSC_FALSE;
1466     job_true.iNumJobThreads--;
1467     job_true.iNumReadyThreads--;
1468     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1469     pthread_mutex_unlock(&job_true.mutex);
1470     if(job_true.pdata==NULL) {
1471       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1472     }
1473     else {
1474       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1475     }
1476     if(iterr!=0) {
1477       ithreaderr = 1;
1478     }
1479     //the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1480     //what happens if a thread finishes before they all start? BAD!
1481     //what happens if a thread finishes before any else start? BAD!
1482     pthread_barrier_wait(job_true.pbarr); //ensures all threads are finished
1483     //reset job
1484     if(PetscThreadGo) {
1485       pthread_mutex_lock(&job_true.mutex);
1486       job_true.iNumReadyThreads++;
1487       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1488 	//signal the 'main' thread that the job is done! (only done once)
1489 	ierr = pthread_cond_signal(&main_cond);
1490       }
1491     }
1492   }
1493   return NULL;
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "PetscThreadInitialize_True"
1498 void* PetscThreadInitialize_True(PetscInt N) {
1499   PetscInt i;
1500   int status;
1501 
1502   if(PetscUseThreadPool) {
1503     pVal = (int*)malloc(N*sizeof(int));
1504     //allocate memory in the heap for the thread structure
1505     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1506     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); //BarrPoint[0] makes no sense, don't use it!
1507     job_true.pdata = (void**)malloc(N*sizeof(void*));
1508     for(i=0; i<N; i++) {
1509       pVal[i] = i;
1510       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1511       //error check to ensure proper thread creation
1512       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1513       //error check
1514     }
1515   }
1516   else {
1517   }
1518   return NULL;
1519 }
1520 
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "PetscThreadFinalize_True"
1524 PetscErrorCode PetscThreadFinalize_True() {
1525   int i,ierr;
1526   void* jstatus;
1527 
1528   PetscFunctionBegin;
1529 
1530   if(PetscUseThreadPool) {
1531     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
1532     //join the threads
1533     for(i=0; i<PetscMaxThreads; i++) {
1534       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1535       //do error checking
1536     }
1537     free(BarrPoint);
1538     free(PetscThreadPoint);
1539   }
1540   else {
1541   }
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MainWait_True"
1547 void MainWait_True() {
1548   int ierr;
1549   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1550     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1551   }
1552   ierr = pthread_mutex_unlock(&job_true.mutex);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MainJob_True"
1557 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1558   int ierr;
1559   PetscErrorCode ijoberr = 0;
1560   if(PetscUseThreadPool) {
1561     MainWait();
1562     job_true.pfunc = pFunc;
1563     job_true.pdata = data;
1564     job_true.pbarr = &BarrPoint[n];
1565     job_true.iNumJobThreads = n;
1566     job_true.startJob = PETSC_TRUE;
1567     ierr = pthread_cond_broadcast(&job_true.cond);
1568     if(pFunc!=FuncFinish) {
1569       MainWait(); //why wait after? guarantees that job gets done
1570     }
1571   }
1572   else {
1573     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1574     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1575     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1576     free(apThread);
1577   }
1578   if(ithreaderr) {
1579     ijoberr = ithreaderr;
1580   }
1581   return ijoberr;
1582 }
1583 /****  ****/
1584 
1585 void* FuncFinish(void* arg) {
1586   PetscThreadGo = PETSC_FALSE;
1587   return(0);
1588 }
1589