xref: /petsc/src/sys/objects/init.c (revision 51d315f7f51405b454a6b2ffbc3c7276a5e2a085)
1 //new kds file - implements the M-ary tree
2 /*
3 
4    This file defines part of the initialization of PETSc
5 
6   This file uses regular malloc and free because it cannot know
7   what malloc is being used until it has already processed the input.
8 */
9 
10 #define _GNU_SOURCE
11 #include <sched.h>
12 #include <petscsys.h>        /*I  "petscsys.h"   I*/
13 #include <pthread.h>
14 #include <sys/sysinfo.h>
15 #include <unistd.h>
16 #if defined(PETSC_HAVE_STDLIB_H)
17 #include <stdlib.h>
18 #endif
19 #if defined(PETSC_HAVE_MALLOC_H)
20 #include <malloc.h>
21 #endif
22 #if defined(PETSC_HAVE_VALGRIND)
23 #include <valgrind/valgrind.h>
24 #endif
25 
26 /* ------------------------Nasty global variables -------------------------------*/
27 /*
28      Indicates if PETSc started up MPI, or it was
29    already started before PETSc was initialized.
30 */
31 PetscBool    PetscBeganMPI         = PETSC_FALSE;
32 PetscBool    PetscInitializeCalled = PETSC_FALSE;
33 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
34 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
35 PetscBool    PetscThreadGo         = PETSC_TRUE;
36 PetscMPIInt  PetscGlobalRank = -1;
37 PetscMPIInt  PetscGlobalSize = -1;
38 PetscMPIInt  PetscMaxThreads = 2;
39 pthread_t*   PetscThreadPoint;
40 pthread_barrier_t* BarrPoint;   //used by 'true' thread pool
41 PetscErrorCode ithreaderr = 0;
42 int*         pVal;
43 
44 #define CACHE_LINE_SIZE 64  //used by 'chain', 'main','tree' thread pools
45 int* ThreadCoreAffinity;
46 
47 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  //used by 'chain','tree' thread pool
48 
49 typedef struct {
50   pthread_mutex_t** mutexarray;
51   pthread_cond_t**  cond1array;
52   pthread_cond_t** cond2array;
53   void* (*pfunc)(void*);
54   void** pdata;
55   PetscBool startJob;
56   estat eJobStat;
57   PetscBool** arrThreadStarted;
58   PetscBool** arrThreadReady;
59 } sjob_tree;
60 sjob_tree job_tree;
61 typedef struct {
62   pthread_mutex_t** mutexarray;
63   pthread_cond_t**  cond1array;
64   pthread_cond_t** cond2array;
65   void* (*pfunc)(void*);
66   void** pdata;
67   PetscBool** arrThreadReady;
68 } sjob_main;
69 sjob_main job_main;
70 typedef struct {
71   pthread_mutex_t** mutexarray;
72   pthread_cond_t**  cond1array;
73   pthread_cond_t** cond2array;
74   void* (*pfunc)(void*);
75   void** pdata;
76   PetscBool startJob;
77   estat eJobStat;
78   PetscBool** arrThreadStarted;
79   PetscBool** arrThreadReady;
80 } sjob_chain;
81 sjob_chain job_chain;
82 typedef struct {
83   pthread_mutex_t mutex;
84   pthread_cond_t cond;
85   void* (*pfunc)(void*);
86   void** pdata;
87   pthread_barrier_t* pbarr;
88   int iNumJobThreads;
89   int iNumReadyThreads;
90   PetscBool startJob;
91 } sjob_true;
92 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
93 
94 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  //used by 'true', 'chain','tree' thread pools
95 char* arrmutex; //used by 'chain','main','tree' thread pools
96 char* arrcond1; //used by 'chain','main','tree' thread pools
97 char* arrcond2; //used by 'chain','main','tree' thread pools
98 char* arrstart; //used by 'chain','main','tree' thread pools
99 char* arrready; //used by 'chain','main','tree' thread pools
100 
101 /* Function Pointers */
102 void*          (*PetscThreadFunc)(void*) = NULL;
103 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
104 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
105 void           (*MainWait)(void) = NULL;
106 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
107 /**** Tree Functions ****/
108 void*          PetscThreadFunc_Tree(void*);
109 void*          PetscThreadInitialize_Tree(PetscInt);
110 PetscErrorCode PetscThreadFinalize_Tree(void);
111 void           MainWait_Tree(void);
112 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
113 /**** Main Functions ****/
114 void*          PetscThreadFunc_Main(void*);
115 void*          PetscThreadInitialize_Main(PetscInt);
116 PetscErrorCode PetscThreadFinalize_Main(void);
117 void           MainWait_Main(void);
118 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
119 /**** Chain Functions ****/
120 void*          PetscThreadFunc_Chain(void*);
121 void*          PetscThreadInitialize_Chain(PetscInt);
122 PetscErrorCode PetscThreadFinalize_Chain(void);
123 void           MainWait_Chain(void);
124 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
125 /**** True Functions ****/
126 void*          PetscThreadFunc_True(void*);
127 void*          PetscThreadInitialize_True(PetscInt);
128 PetscErrorCode PetscThreadFinalize_True(void);
129 void           MainWait_True(void);
130 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
131 /****  ****/
132 
133 void* FuncFinish(void*);
134 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
135 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
136 
137 #if defined(PETSC_USE_COMPLEX)
138 #if defined(PETSC_COMPLEX_INSTANTIATE)
139 template <> class std::complex<double>; /* instantiate complex template class */
140 #endif
141 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
142 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
143 MPI_Datatype   MPI_C_COMPLEX;
144 #endif
145 PetscScalar    PETSC_i;
146 #else
147 PetscScalar    PETSC_i = 0.0;
148 #endif
149 #if defined(PETSC_USE_REAL___FLOAT128)
150 MPI_Datatype   MPIU___FLOAT128 = 0;
151 #endif
152 MPI_Datatype   MPIU_2SCALAR = 0;
153 MPI_Datatype   MPIU_2INT = 0;
154 
155 /*
156      These are needed by petscbt.h
157 */
158 #include <petscbt.h>
159 char      _BT_mask = ' ';
160 char      _BT_c = ' ';
161 PetscInt  _BT_idx  = 0;
162 
163 /*
164        Function that is called to display all error messages
165 */
166 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
167 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
168 #if defined(PETSC_HAVE_MATLAB_ENGINE)
169 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
170 #else
171 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
172 #endif
173 /*
174   This is needed to turn on/off cusp synchronization */
175 PetscBool   synchronizeCUSP = PETSC_FALSE;
176 
177 /* ------------------------------------------------------------------------------*/
178 /*
179    Optional file where all PETSc output from various prints is saved
180 */
181 FILE *petsc_history = PETSC_NULL;
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "PetscOpenHistoryFile"
185 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
186 {
187   PetscErrorCode ierr;
188   PetscMPIInt    rank,size;
189   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
190   char           version[256];
191 
192   PetscFunctionBegin;
193   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
194   if (!rank) {
195     char        arch[10];
196     int         err;
197     PetscViewer viewer;
198 
199     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
200     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
201     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
202     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
203     if (filename) {
204       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
205     } else {
206       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
207       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
208       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
209     }
210 
211     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
212     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
213     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
214     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
215     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
216     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
217     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
218     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
219     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
220     err = fflush(*fd);
221     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
222   }
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "PetscCloseHistoryFile"
228 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
229 {
230   PetscErrorCode ierr;
231   PetscMPIInt    rank;
232   char           date[64];
233   int            err;
234 
235   PetscFunctionBegin;
236   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
237   if (!rank) {
238     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
239     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
240     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
241     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
242     err = fflush(*fd);
243     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
244     err = fclose(*fd);
245     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
246   }
247   PetscFunctionReturn(0);
248 }
249 
250 /* ------------------------------------------------------------------------------*/
251 
252 /*
253    This is ugly and probably belongs somewhere else, but I want to
254   be able to put a true MPI abort error handler with command line args.
255 
256     This is so MPI errors in the debugger will leave all the stack
257   frames. The default MP_Abort() cleans up and exits thus providing no useful information
258   in the debugger hence we call abort() instead of MPI_Abort().
259 */
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "Petsc_MPI_AbortOnError"
263 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
264 {
265   PetscFunctionBegin;
266   (*PetscErrorPrintf)("MPI error %d\n",*flag);
267   abort();
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
272 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
273 {
274   PetscErrorCode ierr;
275 
276   PetscFunctionBegin;
277   (*PetscErrorPrintf)("MPI error %d\n",*flag);
278   ierr = PetscAttachDebugger();
279   if (ierr) { /* hopeless so get out */
280     MPI_Abort(*comm,*flag);
281   }
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "PetscEnd"
286 /*@C
287    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
288      wishes a clean exit somewhere deep in the program.
289 
290    Collective on PETSC_COMM_WORLD
291 
292    Options Database Keys are the same as for PetscFinalize()
293 
294    Level: advanced
295 
296    Note:
297    See PetscInitialize() for more general runtime options.
298 
299 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
300 @*/
301 PetscErrorCode  PetscEnd(void)
302 {
303   PetscFunctionBegin;
304   PetscFinalize();
305   exit(0);
306   return 0;
307 }
308 
309 PetscBool    PetscOptionsPublish = PETSC_FALSE;
310 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
311 extern PetscBool  petscsetmallocvisited;
312 static char       emacsmachinename[256];
313 
314 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
315 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "PetscSetHelpVersionFunctions"
319 /*@C
320    PetscSetHelpVersionFunctions - Sets functions that print help and version information
321    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
322    This routine enables a "higher-level" package that uses PETSc to print its messages first.
323 
324    Input Parameter:
325 +  help - the help function (may be PETSC_NULL)
326 -  version - the version function (may be PETSC_NULL)
327 
328    Level: developer
329 
330    Concepts: package help message
331 
332 @*/
333 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
334 {
335   PetscFunctionBegin;
336   PetscExternalHelpFunction    = help;
337   PetscExternalVersionFunction = version;
338   PetscFunctionReturn(0);
339 }
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
343 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
344 {
345   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
346   MPI_Comm       comm = PETSC_COMM_WORLD;
347   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
348   PetscErrorCode ierr;
349   PetscReal      si;
350   int            i;
351   PetscMPIInt    rank;
352   char           version[256];
353 
354   PetscFunctionBegin;
355   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
356 
357   /*
358       Setup the memory management; support for tracing malloc() usage
359   */
360   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
361 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
362   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
363   if ((!flg2 || flg1) && !petscsetmallocvisited) {
364 #if defined(PETSC_HAVE_VALGRIND)
365     if (flg2 || !(RUNNING_ON_VALGRIND)) {
366       /* turn off default -malloc if valgrind is being used */
367 #endif
368       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
369 #if defined(PETSC_HAVE_VALGRIND)
370     }
371 #endif
372   }
373 #else
374   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
375   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
376   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
377 #endif
378   if (flg3) {
379     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
380   }
381   flg1 = PETSC_FALSE;
382   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
383   if (flg1) {
384     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
385     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
386   }
387 
388   flg1 = PETSC_FALSE;
389   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
390   if (!flg1) {
391     flg1 = PETSC_FALSE;
392     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
393   }
394   if (flg1) {
395     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
396   }
397 
398   /*
399       Set the display variable for graphics
400   */
401   ierr = PetscSetDisplay();CHKERRQ(ierr);
402 
403   /*
404       Determine whether user specified maximum number of threads
405    */
406   ierr = PetscOptionsHasName(PETSC_NULL,"-thread_max",&flg1);CHKERRQ(ierr);
407   if(flg1) {
408     ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
409   }
410 
411   /*
412       Determine whether to use thread pool
413    */
414   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
415   if(flg1) {
416     PetscUseThreadPool = PETSC_TRUE;
417     PetscInt N_CORES = get_nprocs();
418     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
419     char tstr[9];
420     char tbuf[2];
421     strcpy(tstr,"-thread");
422     for(i=0;i<PetscMaxThreads;i++) {
423       ThreadCoreAffinity[i] = i;  //default
424       sprintf(tbuf,"%d",i);
425       strcat(tstr,tbuf);
426       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
427       if(flg1) {
428         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
429         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; //check on the user
430       }
431       tstr[7] = '\0';
432     }
433     //get the thread pool type
434     PetscInt ipool = 0;
435     ierr = PetscOptionsGetInt(PETSC_NULL,"-pool",&ipool,PETSC_NULL);CHKERRQ(ierr);
436     switch(ipool) {
437     case 1:
438       PetscThreadFunc       = &PetscThreadFunc_Tree;
439       PetscThreadInitialize = &PetscThreadInitialize_Tree;
440       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
441       MainWait              = &MainWait_Tree;
442       MainJob               = &MainJob_Tree;
443       break;
444     case 2:
445       PetscThreadFunc       = &PetscThreadFunc_Main;
446       PetscThreadInitialize = &PetscThreadInitialize_Main;
447       PetscThreadFinalize   = &PetscThreadFinalize_Main;
448       MainWait              = &MainWait_Main;
449       MainJob               = &MainJob_Main;
450       break;
451     case 3:
452       PetscThreadFunc       = &PetscThreadFunc_Chain;
453       PetscThreadInitialize = &PetscThreadInitialize_Chain;
454       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
455       MainWait              = &MainWait_Chain;
456       MainJob               = &MainJob_Chain;
457       break;
458     default:
459       PetscThreadFunc       = &PetscThreadFunc_True;
460       PetscThreadInitialize = &PetscThreadInitialize_True;
461       PetscThreadFinalize   = &PetscThreadFinalize_True;
462       MainWait              = &MainWait_True;
463       MainJob               = &MainJob_True;
464       break;
465     }
466   }
467   PetscThreadInitialize(PetscMaxThreads);
468   /*
469       Print the PETSc version information
470   */
471   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
472   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
473   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
474   if (flg1 || flg2 || flg3){
475 
476     /*
477        Print "higher-level" package version message
478     */
479     if (PetscExternalVersionFunction) {
480       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
481     }
482 
483     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
484     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
485 ------------------------------\n");CHKERRQ(ierr);
486     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
487     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
488     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
489     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
490     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
491     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
492     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
493 ------------------------------\n");CHKERRQ(ierr);
494   }
495 
496   /*
497        Print "higher-level" package help message
498   */
499   if (flg3){
500     if (PetscExternalHelpFunction) {
501       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
502     }
503   }
504 
505   /*
506       Setup the error handling
507   */
508   flg1 = PETSC_FALSE;
509   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
510   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
511   flg1 = PETSC_FALSE;
512   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
513   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
514   flg1 = PETSC_FALSE;
515   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
516   if (flg1) {
517     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
518   }
519   flg1 = PETSC_FALSE;
520   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
521   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
522   flg1 = PETSC_FALSE;
523   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
524   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
525 
526   /*
527       Setup debugger information
528   */
529   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
530   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
531   if (flg1) {
532     MPI_Errhandler err_handler;
533 
534     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
535     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
536     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
537     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
538   }
539   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
540   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
541   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
542   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
543   if (flg1 || flg2) {
544     PetscMPIInt    size;
545     PetscInt       lsize,*nodes;
546     MPI_Errhandler err_handler;
547     /*
548        we have to make sure that all processors have opened
549        connections to all other processors, otherwise once the
550        debugger has stated it is likely to receive a SIGUSR1
551        and kill the program.
552     */
553     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
554     if (size > 2) {
555       PetscMPIInt dummy = 0;
556       MPI_Status  status;
557       for (i=0; i<size; i++) {
558         if (rank != i) {
559           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
560         }
561       }
562       for (i=0; i<size; i++) {
563         if (rank != i) {
564           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
565         }
566       }
567     }
568     /* check if this processor node should be in debugger */
569     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
570     lsize = size;
571     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
572     if (flag) {
573       for (i=0; i<lsize; i++) {
574         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
575       }
576     }
577     if (!flag) {
578       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
579       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
580       if (flg1) {
581         ierr = PetscAttachDebugger();CHKERRQ(ierr);
582       } else {
583         ierr = PetscStopForDebugger();CHKERRQ(ierr);
584       }
585       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
586       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
587     }
588     ierr = PetscFree(nodes);CHKERRQ(ierr);
589   }
590 
591   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
592   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
593 
594 #if defined(PETSC_USE_SOCKET_VIEWER)
595   /*
596     Activates new sockets for zope if needed
597   */
598   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
599   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
600   if (flgz){
601     int  sockfd;
602     char hostname[256];
603     char username[256];
604     int  remoteport = 9999;
605 
606     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
607     if (!hostname[0]){
608       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
609     }
610     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
611     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
612     PETSC_ZOPEFD = fdopen(sockfd, "w");
613     if (flgzout){
614       PETSC_STDOUT = PETSC_ZOPEFD;
615       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
616       fprintf(PETSC_STDOUT, "<<<start>>>");
617     } else {
618       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
619       fprintf(PETSC_ZOPEFD, "<<<start>>>");
620     }
621   }
622 #endif
623 #if defined(PETSC_USE_SERVER)
624   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
625   if (flgz){
626     PetscInt port = PETSC_DECIDE;
627     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
628     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
629   }
630 #endif
631 
632   /*
633         Setup profiling and logging
634   */
635 #if defined (PETSC_USE_INFO)
636   {
637     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
638     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
639     if (flg1 && logname[0]) {
640       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
641     } else if (flg1) {
642       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
643     }
644   }
645 #endif
646 #if defined(PETSC_USE_LOG)
647   mname[0] = 0;
648   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
649   if (flg1) {
650     if (mname[0]) {
651       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
652     } else {
653       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
654     }
655   }
656 #if defined(PETSC_HAVE_MPE)
657   flg1 = PETSC_FALSE;
658   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
659   if (flg1) PetscLogMPEBegin();
660 #endif
661   flg1 = PETSC_FALSE;
662   flg2 = PETSC_FALSE;
663   flg3 = PETSC_FALSE;
664   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
665   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
666   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
667   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
668   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
669   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
670 
671   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
672   if (flg1) {
673     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
674     FILE *file;
675     if (mname[0]) {
676       sprintf(name,"%s.%d",mname,rank);
677       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
678       file = fopen(fname,"w");
679       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
680     } else {
681       file = PETSC_STDOUT;
682     }
683     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
684   }
685 #endif
686 
687   /*
688       Setup building of stack frames for all function calls
689   */
690 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
691   ierr = PetscStackCreate();CHKERRQ(ierr);
692 #endif
693 
694   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
695 
696   /*
697        Print basic help message
698   */
699   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
700   if (flg1) {
701     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
702     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
703     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
704     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
705     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
706     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
707     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
708     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
709     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
710     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
711     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
712     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
713     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
714     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
715     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
716     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
717     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
718     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
719     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
720     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
721     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
722     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
723     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
724     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
725     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
726     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
727     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
728     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
729     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
730     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
731     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
732     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
733     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
734 #if defined(PETSC_USE_LOG)
735     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
736     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr);
737     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
738 #if defined(PETSC_HAVE_MPE)
739     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
740 #endif
741     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
742 #endif
743     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
747   }
748 
749   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
750   if (flg1) {
751     ierr = PetscSleep(si);CHKERRQ(ierr);
752   }
753 
754   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
755   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
756   if (f) {
757     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
758   }
759 
760 #if defined(PETSC_HAVE_CUSP)
761   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
762   if (flg3) flg1 = PETSC_TRUE;
763   else flg1 = PETSC_FALSE;
764   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
765   if (flg1) synchronizeCUSP = PETSC_TRUE;
766 #endif
767 
768   PetscFunctionReturn(0);
769 }
770 
771 /**** 'Tree' Thread Pool Functions ****/
772 void* PetscThreadFunc_Tree(void* arg) {
773   PetscErrorCode iterr;
774   int icorr,ierr;
775   int* pId = (int*)arg;
776   int ThreadId = *pId,Mary = 2,i,SubWorker;
777   PetscBool PeeOn;
778   cpu_set_t mset;
779 
780   icorr = ThreadCoreAffinity[ThreadId];
781   CPU_ZERO(&mset);
782   CPU_SET(icorr,&mset);
783   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
784 
785   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
786     PeeOn = PETSC_TRUE;
787   }
788   else {
789     PeeOn = PETSC_FALSE;
790   }
791   if(PeeOn==PETSC_FALSE) {
792     //check your subordinates, wait for them to be ready
793     for(i=1;i<=Mary;i++) {
794       SubWorker = Mary*ThreadId+i;
795       if(SubWorker<PetscMaxThreads) {
796         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
797         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
798           //upon entry, automically releases the lock and blocks
799           //upon return, has the lock
800           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
801         }
802         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
803       }
804     }
805     //your subordinates are now ready
806   }
807   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
808   //update your ready status
809   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
810   if(ThreadId==0) {
811     job_tree.eJobStat = JobCompleted;
812     //signal main
813     ierr = pthread_cond_signal(&main_cond);
814   }
815   else {
816     //tell your boss that you're ready to work
817     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
818   }
819   //the while loop needs to have an exit
820   //the 'main' thread can terminate all the threads by performing a broadcast
821   //and calling FuncFinish
822   while(PetscThreadGo) {
823     //need to check the condition to ensure we don't have to wait
824     //waiting when you don't have to causes problems
825     //also need to check the condition to ensure proper handling of spurious wakeups
826     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
827         //upon entry, automically releases the lock and blocks
828         //upon return, has the lock
829         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
830 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
831 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
832     }
833     if(ThreadId==0) {
834       job_tree.startJob = PETSC_FALSE;
835       job_tree.eJobStat = ThreadsWorking;
836     }
837     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
838     if(PeeOn==PETSC_FALSE) {
839       //tell your subordinates it's time to get to work
840       for(i=1; i<=Mary; i++) {
841 	SubWorker = Mary*ThreadId+i;
842         if(SubWorker<PetscMaxThreads) {
843           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
844         }
845       }
846     }
847     //do your job
848     if(job_tree.pdata==NULL) {
849       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
850     }
851     else {
852       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
853     }
854     if(iterr!=0) {
855       ithreaderr = 1;
856     }
857     if(PetscThreadGo) {
858       //reset job, get ready for more
859       if(PeeOn==PETSC_FALSE) {
860         //check your subordinates, waiting for them to be ready
861 	//how do you know for a fact that a given subordinate has actually started?
862 	for(i=1;i<=Mary;i++) {
863 	  SubWorker = Mary*ThreadId+i;
864           if(SubWorker<PetscMaxThreads) {
865             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
866             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
867               //upon entry, automically releases the lock and blocks
868               //upon return, has the lock
869               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
870             }
871             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
872           }
873 	}
874         //your subordinates are now ready
875       }
876       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
877       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
878       if(ThreadId==0) {
879 	job_tree.eJobStat = JobCompleted; //root thread: last thread to complete, guaranteed!
880         //root thread signals 'main'
881         ierr = pthread_cond_signal(&main_cond);
882       }
883       else {
884         //signal your boss before you go to sleep
885         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
886       }
887     }
888   }
889   return NULL;
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "PetscThreadInitialize_Tree"
894 void* PetscThreadInitialize_Tree(PetscInt N) {
895   PetscInt i,ierr;
896   int status;
897 
898   if(PetscUseThreadPool) {
899     size_t Val1 = (size_t)CACHE_LINE_SIZE;
900     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
901     arrmutex = (char*)memalign(Val1,Val2);
902     arrcond1 = (char*)memalign(Val1,Val2);
903     arrcond2 = (char*)memalign(Val1,Val2);
904     arrstart = (char*)memalign(Val1,Val2);
905     arrready = (char*)memalign(Val1,Val2);
906     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
907     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
908     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
909     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
910     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
911     //initialize job structure
912     for(i=0; i<PetscMaxThreads; i++) {
913       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
914       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
915       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
916       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
917       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
918     }
919     for(i=0; i<PetscMaxThreads; i++) {
920       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
921       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
922       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
923       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
924       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
925     }
926     job_tree.pfunc = NULL;
927     job_tree.pdata = (void**)malloc(N*sizeof(void*));
928     job_tree.startJob = PETSC_FALSE;
929     job_tree.eJobStat = JobInitiated;
930     pVal = (int*)malloc(N*sizeof(int));
931     //allocate memory in the heap for the thread structure
932     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
933     //create threads
934     for(i=0; i<N; i++) {
935       pVal[i] = i;
936       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
937       //error check
938     }
939   }
940   else {
941     //do nothing
942   }
943   return NULL;
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "PetscThreadFinalize_Tree"
948 PetscErrorCode PetscThreadFinalize_Tree() {
949   int i,ierr;
950   void* jstatus;
951 
952   PetscFunctionBegin;
953 
954   if(PetscUseThreadPool) {
955     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
956     //join the threads
957     for(i=0; i<PetscMaxThreads; i++) {
958       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
959       //do error checking
960     }
961     free(PetscThreadPoint);
962     free(arrmutex);
963     free(arrcond1);
964     free(arrcond2);
965     free(arrstart);
966     free(arrready);
967     free(job_tree.pdata);
968     free(pVal);
969   }
970   else {
971   }
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MainWait_Tree"
977 void MainWait_Tree() {
978   int ierr;
979   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
980   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
981     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
982   }
983   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MainJob_Tree"
988 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
989   int i,ierr;
990   PetscErrorCode ijoberr = 0;
991   if(PetscUseThreadPool) {
992     MainWait();
993     job_tree.pfunc = pFunc;
994     job_tree.pdata = data;
995     job_tree.startJob = PETSC_TRUE;
996     for(i=0; i<PetscMaxThreads; i++) {
997       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
998     }
999     job_tree.eJobStat = JobInitiated;
1000     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1001     if(pFunc!=FuncFinish) {
1002       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1003     }
1004   }
1005   else {
1006     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1007     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1008     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1009     free(apThread);
1010   }
1011   if(ithreaderr) {
1012     ijoberr = ithreaderr;
1013   }
1014   return ijoberr;
1015 }
1016 /****  ****/
1017 
1018 /**** 'Main' Thread Pool Functions ****/
1019 void* PetscThreadFunc_Main(void* arg) {
1020   PetscErrorCode iterr;
1021   int icorr,ierr;
1022   int* pId = (int*)arg;
1023   int ThreadId = *pId;
1024   cpu_set_t mset;
1025 
1026   icorr = ThreadCoreAffinity[ThreadId];
1027   CPU_ZERO(&mset);
1028   CPU_SET(icorr,&mset);
1029   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1030 
1031   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1032   //update your ready status
1033   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1034   //tell the BOSS that you're ready to work before you go to sleep
1035   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1036 
1037   //the while loop needs to have an exit
1038   //the 'main' thread can terminate all the threads by performing a broadcast
1039   //and calling FuncFinish
1040   while(PetscThreadGo) {
1041     //need to check the condition to ensure we don't have to wait
1042     //waiting when you don't have to causes problems
1043     //also need to check the condition to ensure proper handling of spurious wakeups
1044     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1045         //upon entry, atomically releases the lock and blocks
1046         //upon return, has the lock
1047         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1048 	//*(job_main.arrThreadReady[ThreadId])   = PETSC_FALSE;
1049     }
1050     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1051     //do your job
1052     if(job_main.pdata==NULL) {
1053       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1054     }
1055     else {
1056       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1057     }
1058     if(iterr!=0) {
1059       ithreaderr = 1;
1060     }
1061     if(PetscThreadGo) {
1062       //reset job, get ready for more
1063       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1064       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1065       //tell the BOSS that you're ready to work before you go to sleep
1066       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1067     }
1068   }
1069   return NULL;
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "PetscThreadInitialize_Main"
1074 void* PetscThreadInitialize_Main(PetscInt N) {
1075   PetscInt i,ierr;
1076   int status;
1077 
1078   if(PetscUseThreadPool) {
1079     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1080     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1081     arrmutex = (char*)memalign(Val1,Val2);
1082     arrcond1 = (char*)memalign(Val1,Val2);
1083     arrcond2 = (char*)memalign(Val1,Val2);
1084     arrstart = (char*)memalign(Val1,Val2);
1085     arrready = (char*)memalign(Val1,Val2);
1086     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1087     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1088     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1089     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1090     //initialize job structure
1091     for(i=0; i<PetscMaxThreads; i++) {
1092       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1093       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1094       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1095       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1096     }
1097     for(i=0; i<PetscMaxThreads; i++) {
1098       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1099       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1100       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1101       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1102     }
1103     job_main.pfunc = NULL;
1104     job_main.pdata = (void**)malloc(N*sizeof(void*));
1105     pVal = (int*)malloc(N*sizeof(int));
1106     //allocate memory in the heap for the thread structure
1107     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1108     //create threads
1109     for(i=0; i<N; i++) {
1110       pVal[i] = i;
1111       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1112       //error check
1113     }
1114   }
1115   else {
1116   }
1117   return NULL;
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "PetscThreadFinalize_Main"
1122 PetscErrorCode PetscThreadFinalize_Main() {
1123   int i,ierr;
1124   void* jstatus;
1125 
1126   PetscFunctionBegin;
1127 
1128   if(PetscUseThreadPool) {
1129     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
1130     //join the threads
1131     for(i=0; i<PetscMaxThreads; i++) {
1132       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1133       //do error checking
1134     }
1135     free(PetscThreadPoint);
1136     free(arrmutex);
1137     free(arrcond1);
1138     free(arrcond2);
1139     free(arrstart);
1140     free(arrready);
1141     free(job_main.pdata);
1142     free(pVal);
1143   }
1144   else {
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MainWait_Main"
1151 void MainWait_Main() {
1152   int i,ierr;
1153   for(i=0; i<PetscMaxThreads; i++) {
1154     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1155     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1156       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1157     }
1158     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1159   }
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MainJob_Main"
1164 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1165   int i,ierr;
1166   PetscErrorCode ijoberr = 0;
1167   if(PetscUseThreadPool) {
1168     MainWait(); //you know everyone is waiting to be signalled!
1169     job_main.pfunc = pFunc;
1170     job_main.pdata = data;
1171     for(i=0; i<PetscMaxThreads; i++) {
1172       *(job_main.arrThreadReady[i]) = PETSC_FALSE; //why do this?  suppose you get into MainWait first
1173     }
1174     //tell the threads to go to work
1175     for(i=0; i<PetscMaxThreads; i++) {
1176       ierr = pthread_cond_signal(job_main.cond2array[i]);
1177     }
1178     if(pFunc!=FuncFinish) {
1179       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1180     }
1181   }
1182   else {
1183     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1184     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1185     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1186     free(apThread);
1187   }
1188   if(ithreaderr) {
1189     ijoberr = ithreaderr;
1190   }
1191   return ijoberr;
1192 }
1193 /****  ****/
1194 
1195 /**** Chain Thread Functions ****/
1196 void* PetscThreadFunc_Chain(void* arg) {
1197   PetscErrorCode iterr;
1198   int icorr,ierr;
1199   int* pId = (int*)arg;
1200   int ThreadId = *pId;
1201   int SubWorker = ThreadId + 1;
1202   PetscBool PeeOn;
1203   cpu_set_t mset;
1204 
1205   icorr = ThreadCoreAffinity[ThreadId];
1206   CPU_ZERO(&mset);
1207   CPU_SET(icorr,&mset);
1208   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1209 
1210   if(ThreadId==(PetscMaxThreads-1)) {
1211     PeeOn = PETSC_TRUE;
1212   }
1213   else {
1214     PeeOn = PETSC_FALSE;
1215   }
1216   if(PeeOn==PETSC_FALSE) {
1217     //check your subordinate, wait for him to be ready
1218     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1219     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1220       //upon entry, automically releases the lock and blocks
1221       //upon return, has the lock
1222       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1223     }
1224     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1225     //your subordinate is now ready
1226   }
1227   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1228   //update your ready status
1229   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1230   if(ThreadId==0) {
1231     job_chain.eJobStat = JobCompleted;
1232     //signal main
1233     ierr = pthread_cond_signal(&main_cond);
1234   }
1235   else {
1236     //tell your boss that you're ready to work
1237     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1238   }
1239   //the while loop needs to have an exit
1240   //the 'main' thread can terminate all the threads by performing a broadcast
1241   //and calling FuncFinish
1242   while(PetscThreadGo) {
1243     //need to check the condition to ensure we don't have to wait
1244     //waiting when you don't have to causes problems
1245     //also need to check the condition to ensure proper handling of spurious wakeups
1246     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1247         //upon entry, automically releases the lock and blocks
1248         //upon return, has the lock
1249         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1250 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1251 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1252     }
1253     if(ThreadId==0) {
1254       job_chain.startJob = PETSC_FALSE;
1255       job_chain.eJobStat = ThreadsWorking;
1256     }
1257     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1258     if(PeeOn==PETSC_FALSE) {
1259       //tell your subworker it's time to get to work
1260       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1261     }
1262     //do your job
1263     if(job_chain.pdata==NULL) {
1264       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1265     }
1266     else {
1267       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1268     }
1269     if(iterr!=0) {
1270       ithreaderr = 1;
1271     }
1272     if(PetscThreadGo) {
1273       //reset job, get ready for more
1274       if(PeeOn==PETSC_FALSE) {
1275         //check your subordinate, wait for him to be ready
1276 	//how do you know for a fact that your subordinate has actually started?
1277         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1278         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1279           //upon entry, automically releases the lock and blocks
1280           //upon return, has the lock
1281           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1282         }
1283         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1284         //your subordinate is now ready
1285       }
1286       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1287       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1288       if(ThreadId==0) {
1289 	job_chain.eJobStat = JobCompleted; //foreman: last thread to complete, guaranteed!
1290         //root thread (foreman) signals 'main'
1291         ierr = pthread_cond_signal(&main_cond);
1292       }
1293       else {
1294         //signal your boss before you go to sleep
1295         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1296       }
1297     }
1298   }
1299   return NULL;
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "PetscThreadInitialize_Chain"
1304 void* PetscThreadInitialize_Chain(PetscInt N) {
1305   PetscInt i,ierr;
1306   int status;
1307 
1308   if(PetscUseThreadPool) {
1309     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1310     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1311     arrmutex = (char*)memalign(Val1,Val2);
1312     arrcond1 = (char*)memalign(Val1,Val2);
1313     arrcond2 = (char*)memalign(Val1,Val2);
1314     arrstart = (char*)memalign(Val1,Val2);
1315     arrready = (char*)memalign(Val1,Val2);
1316     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1317     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1318     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1319     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1320     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1321     //initialize job structure
1322     for(i=0; i<PetscMaxThreads; i++) {
1323       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1324       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1325       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1326       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1327       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1328     }
1329     for(i=0; i<PetscMaxThreads; i++) {
1330       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1331       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1332       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1333       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1334       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1335     }
1336     job_chain.pfunc = NULL;
1337     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1338     job_chain.startJob = PETSC_FALSE;
1339     job_chain.eJobStat = JobInitiated;
1340     pVal = (int*)malloc(N*sizeof(int));
1341     //allocate memory in the heap for the thread structure
1342     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1343     //create threads
1344     for(i=0; i<N; i++) {
1345       pVal[i] = i;
1346       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1347       //error check
1348     }
1349   }
1350   else {
1351   }
1352   return NULL;
1353 }
1354 
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "PetscThreadFinalize_Chain"
1358 PetscErrorCode PetscThreadFinalize_Chain() {
1359   int i,ierr;
1360   void* jstatus;
1361 
1362   PetscFunctionBegin;
1363 
1364   if(PetscUseThreadPool) {
1365     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
1366     //join the threads
1367     for(i=0; i<PetscMaxThreads; i++) {
1368       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1369       //do error checking
1370     }
1371     free(PetscThreadPoint);
1372     free(arrmutex);
1373     free(arrcond1);
1374     free(arrcond2);
1375     free(arrstart);
1376     free(arrready);
1377     free(job_chain.pdata);
1378     free(pVal);
1379   }
1380   else {
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MainWait_Chain"
1387 void MainWait_Chain() {
1388   int ierr;
1389   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1390   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1391     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1392   }
1393   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1394 }
1395 
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MainJob_Chain"
1398 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1399   int i,ierr;
1400   PetscErrorCode ijoberr = 0;
1401   if(PetscUseThreadPool) {
1402     MainWait();
1403     job_chain.pfunc = pFunc;
1404     job_chain.pdata = data;
1405     job_chain.startJob = PETSC_TRUE;
1406     for(i=0; i<PetscMaxThreads; i++) {
1407       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1408     }
1409     job_chain.eJobStat = JobInitiated;
1410     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1411     if(pFunc!=FuncFinish) {
1412       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1413     }
1414   }
1415   else {
1416     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1417     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1418     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1419     free(apThread);
1420   }
1421   if(ithreaderr) {
1422     ijoberr = ithreaderr;
1423   }
1424   return ijoberr;
1425 }
1426 /****  ****/
1427 
1428 /**** True Thread Functions ****/
1429 void* PetscThreadFunc_True(void* arg) {
1430   int icorr,ierr,iVal;
1431   int* pId = (int*)arg;
1432   int ThreadId = *pId;
1433   PetscErrorCode iterr;
1434   cpu_set_t mset;
1435 
1436   icorr = ThreadCoreAffinity[ThreadId];
1437   CPU_ZERO(&mset);
1438   CPU_SET(icorr,&mset);
1439   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1440 
1441   ierr = pthread_mutex_lock(&job_true.mutex);
1442   job_true.iNumReadyThreads++;
1443   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1444     ierr = pthread_cond_signal(&main_cond);
1445   }
1446   //the while loop needs to have an exit
1447   //the 'main' thread can terminate all the threads by performing a broadcast
1448   //and calling FuncFinish
1449   while(PetscThreadGo) {
1450     //need to check the condition to ensure we don't have to wait
1451     //waiting when you don't have to causes problems
1452     //also need to wait if another thread sneaks in and messes with the predicate
1453     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1454       //upon entry, automically releases the lock and blocks
1455       //upon return, has the lock
1456       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1457     }
1458     job_true.startJob = PETSC_FALSE;
1459     job_true.iNumJobThreads--;
1460     job_true.iNumReadyThreads--;
1461     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1462     pthread_mutex_unlock(&job_true.mutex);
1463     if(job_true.pdata==NULL) {
1464       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1465     }
1466     else {
1467       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1468     }
1469     if(iterr!=0) {
1470       ithreaderr = 1;
1471     }
1472     //the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1473     //what happens if a thread finishes before they all start? BAD!
1474     //what happens if a thread finishes before any else start? BAD!
1475     pthread_barrier_wait(job_true.pbarr); //ensures all threads are finished
1476     //reset job
1477     if(PetscThreadGo) {
1478       pthread_mutex_lock(&job_true.mutex);
1479       job_true.iNumReadyThreads++;
1480       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1481 	//signal the 'main' thread that the job is done! (only done once)
1482 	ierr = pthread_cond_signal(&main_cond);
1483       }
1484     }
1485   }
1486   return NULL;
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "PetscThreadInitialize_True"
1491 void* PetscThreadInitialize_True(PetscInt N) {
1492   PetscInt i;
1493   int status;
1494 
1495   if(PetscUseThreadPool) {
1496     pVal = (int*)malloc(N*sizeof(int));
1497     //allocate memory in the heap for the thread structure
1498     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1499     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); //BarrPoint[0] makes no sense, don't use it!
1500     job_true.pdata = (void**)malloc(N*sizeof(void*));
1501     for(i=0; i<N; i++) {
1502       pVal[i] = i;
1503       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1504       //error check to ensure proper thread creation
1505       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1506       //error check
1507     }
1508   }
1509   else {
1510   }
1511   return NULL;
1512 }
1513 
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "PetscThreadFinalize_True"
1517 PetscErrorCode PetscThreadFinalize_True() {
1518   int i,ierr;
1519   void* jstatus;
1520 
1521   PetscFunctionBegin;
1522 
1523   if(PetscUseThreadPool) {
1524     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
1525     //join the threads
1526     for(i=0; i<PetscMaxThreads; i++) {
1527       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1528       //do error checking
1529     }
1530     free(BarrPoint);
1531     free(PetscThreadPoint);
1532   }
1533   else {
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MainWait_True"
1540 void MainWait_True() {
1541   int ierr;
1542   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1543     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1544   }
1545   ierr = pthread_mutex_unlock(&job_true.mutex);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MainJob_True"
1550 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1551   int ierr;
1552   PetscErrorCode ijoberr = 0;
1553   if(PetscUseThreadPool) {
1554     MainWait();
1555     job_true.pfunc = pFunc;
1556     job_true.pdata = data;
1557     job_true.pbarr = &BarrPoint[n];
1558     job_true.iNumJobThreads = n;
1559     job_true.startJob = PETSC_TRUE;
1560     ierr = pthread_cond_broadcast(&job_true.cond);
1561     if(pFunc!=FuncFinish) {
1562       MainWait(); //why wait after? guarantees that job gets done
1563     }
1564   }
1565   else {
1566     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1567     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1568     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1569     free(apThread);
1570   }
1571   if(ithreaderr) {
1572     ijoberr = ithreaderr;
1573   }
1574   return ijoberr;
1575 }
1576 /****  ****/
1577 
1578 void* FuncFinish(void* arg) {
1579   PetscThreadGo = PETSC_FALSE;
1580   return(0);
1581 }
1582