xref: /petsc/src/sys/objects/init.c (revision 6a4bb4b043f38746f1d570dcb66790e05f8eeea6)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #include <petscsys.h>        /*I  "petscsys.h"   I*/
10 
11 #define PETSC_PTHREADCLASSES_DEBUG 0
12 
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #if defined(PETSC_HAVE_SCHED_H)
17 #ifndef __USE_GNU
18 #define __USE_GNU
19 #endif
20 #include <sched.h>
21 #endif
22 #if defined(PETSC_HAVE_PTHREAD_H)
23 #include <pthread.h>
24 #endif
25 
26 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
27 #include <sys/sysinfo.h>
28 #endif
29 #if defined(PETSC_HAVE_UNISTD_H)
30 #include <unistd.h>
31 #endif
32 #if defined(PETSC_HAVE_STDLIB_H)
33 #include <stdlib.h>
34 #endif
35 #if defined(PETSC_HAVE_MALLOC_H)
36 #include <malloc.h>
37 #endif
38 #if defined(PETSC_HAVE_VALGRIND)
39 #include <valgrind/valgrind.h>
40 #endif
41 
42 /* ------------------------Nasty global variables -------------------------------*/
43 /*
44      Indicates if PETSc started up MPI, or it was
45    already started before PETSc was initialized.
46 */
47 PetscBool    PetscBeganMPI         = PETSC_FALSE;
48 PetscBool    PetscInitializeCalled = PETSC_FALSE;
49 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
50 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
51 PetscBool    PetscThreadGo         = PETSC_TRUE;
52 PetscMPIInt  PetscGlobalRank = -1;
53 PetscMPIInt  PetscGlobalSize = -1;
54 
55 #if defined(PETSC_HAVE_PTHREADCLASSES)
56 PetscMPIInt  PetscMaxThreads = 2;
57 pthread_t*   PetscThreadPoint;
58 #define PETSC_HAVE_PTHREAD_BARRIER
59 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
60 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
61 #endif
62 PetscErrorCode ithreaderr = 0;
63 int*         pVal;
64 
65 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
66 int* ThreadCoreAffinity;
67 
68 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
69 
70 typedef struct {
71   pthread_mutex_t** mutexarray;
72   pthread_cond_t**  cond1array;
73   pthread_cond_t** cond2array;
74   void* (*pfunc)(void*);
75   void** pdata;
76   PetscBool startJob;
77   estat eJobStat;
78   PetscBool** arrThreadStarted;
79   PetscBool** arrThreadReady;
80 } sjob_tree;
81 sjob_tree job_tree;
82 typedef struct {
83   pthread_mutex_t** mutexarray;
84   pthread_cond_t**  cond1array;
85   pthread_cond_t** cond2array;
86   void* (*pfunc)(void*);
87   void** pdata;
88   PetscBool** arrThreadReady;
89 } sjob_main;
90 sjob_main job_main;
91 typedef struct {
92   pthread_mutex_t** mutexarray;
93   pthread_cond_t**  cond1array;
94   pthread_cond_t** cond2array;
95   void* (*pfunc)(void*);
96   void** pdata;
97   PetscBool startJob;
98   estat eJobStat;
99   PetscBool** arrThreadStarted;
100   PetscBool** arrThreadReady;
101 } sjob_chain;
102 sjob_chain job_chain;
103 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
104 typedef struct {
105   pthread_mutex_t mutex;
106   pthread_cond_t cond;
107   void* (*pfunc)(void*);
108   void** pdata;
109   pthread_barrier_t* pbarr;
110   int iNumJobThreads;
111   int iNumReadyThreads;
112   PetscBool startJob;
113 } sjob_true;
114 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
115 #endif
116 
117 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
118 char* arrmutex; /* used by 'chain','main','tree' thread pools */
119 char* arrcond1; /* used by 'chain','main','tree' thread pools */
120 char* arrcond2; /* used by 'chain','main','tree' thread pools */
121 char* arrstart; /* used by 'chain','main','tree' thread pools */
122 char* arrready; /* used by 'chain','main','tree' thread pools */
123 
124 /* Function Pointers */
125 void*          (*PetscThreadFunc)(void*) = NULL;
126 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
127 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
128 void           (*MainWait)(void) = NULL;
129 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
130 /**** Tree Thread Pool Functions ****/
131 void*          PetscThreadFunc_Tree(void*);
132 void*          PetscThreadInitialize_Tree(PetscInt);
133 PetscErrorCode PetscThreadFinalize_Tree(void);
134 void           MainWait_Tree(void);
135 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
136 /**** Main Thread Pool Functions ****/
137 void*          PetscThreadFunc_Main(void*);
138 void*          PetscThreadInitialize_Main(PetscInt);
139 PetscErrorCode PetscThreadFinalize_Main(void);
140 void           MainWait_Main(void);
141 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
142 /**** Chain Thread Pool Functions ****/
143 void*          PetscThreadFunc_Chain(void*);
144 void*          PetscThreadInitialize_Chain(PetscInt);
145 PetscErrorCode PetscThreadFinalize_Chain(void);
146 void           MainWait_Chain(void);
147 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
148 /**** True Thread Pool Functions ****/
149 void*          PetscThreadFunc_True(void*);
150 void*          PetscThreadInitialize_True(PetscInt);
151 PetscErrorCode PetscThreadFinalize_True(void);
152 void           MainWait_True(void);
153 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
154 /**** NO Thread Pool Function  ****/
155 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
156 /****  ****/
157 void* FuncFinish(void*);
158 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
159 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
160 #endif
161 
162 #if defined(PETSC_USE_COMPLEX)
163 #if defined(PETSC_COMPLEX_INSTANTIATE)
164 template <> class std::complex<double>; /* instantiate complex template class */
165 #endif
166 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
167 MPI_Datatype   MPIU_C_DOUBLE_COMPLEX;
168 MPI_Datatype   MPIU_C_COMPLEX;
169 #endif
170 PetscScalar    PETSC_i;
171 #else
172 PetscScalar    PETSC_i = 0.0;
173 #endif
174 #if defined(PETSC_USE_REAL___FLOAT128)
175 MPI_Datatype   MPIU___FLOAT128 = 0;
176 #endif
177 MPI_Datatype   MPIU_2SCALAR = 0;
178 MPI_Datatype   MPIU_2INT = 0;
179 
180 /*
181      These are needed by petscbt.h
182 */
183 #include <petscbt.h>
184 char      _BT_mask = ' ';
185 char      _BT_c = ' ';
186 PetscInt  _BT_idx  = 0;
187 
188 /*
189        Function that is called to display all error messages
190 */
191 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
192 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
193 #if defined(PETSC_HAVE_MATLAB_ENGINE)
194 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
195 #else
196 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
197 #endif
198 /*
199   This is needed to turn on/off cusp synchronization */
200 PetscBool   synchronizeCUSP = PETSC_FALSE;
201 
202 /* ------------------------------------------------------------------------------*/
203 /*
204    Optional file where all PETSc output from various prints is saved
205 */
206 FILE *petsc_history = PETSC_NULL;
207 
208 #undef __FUNCT__
209 #define __FUNCT__ "PetscOpenHistoryFile"
210 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
211 {
212   PetscErrorCode ierr;
213   PetscMPIInt    rank,size;
214   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
215   char           version[256];
216 
217   PetscFunctionBegin;
218   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
219   if (!rank) {
220     char        arch[10];
221     int         err;
222     PetscViewer viewer;
223 
224     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
225     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
226     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
227     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
228     if (filename) {
229       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
230     } else {
231       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
232       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
233       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
234     }
235 
236     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
237     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
238     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
239     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
240     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
241     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
242     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
243     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
244     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
245     err = fflush(*fd);
246     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "PetscCloseHistoryFile"
253 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
254 {
255   PetscErrorCode ierr;
256   PetscMPIInt    rank;
257   char           date[64];
258   int            err;
259 
260   PetscFunctionBegin;
261   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
262   if (!rank) {
263     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
264     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
265     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
266     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
267     err = fflush(*fd);
268     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
269     err = fclose(*fd);
270     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 /* ------------------------------------------------------------------------------*/
276 
277 /*
278    This is ugly and probably belongs somewhere else, but I want to
279   be able to put a true MPI abort error handler with command line args.
280 
281     This is so MPI errors in the debugger will leave all the stack
282   frames. The default MP_Abort() cleans up and exits thus providing no useful information
283   in the debugger hence we call abort() instead of MPI_Abort().
284 */
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "Petsc_MPI_AbortOnError"
288 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
289 {
290   PetscFunctionBegin;
291   (*PetscErrorPrintf)("MPI error %d\n",*flag);
292   abort();
293 }
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
297 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
298 {
299   PetscErrorCode ierr;
300 
301   PetscFunctionBegin;
302   (*PetscErrorPrintf)("MPI error %d\n",*flag);
303   ierr = PetscAttachDebugger();
304   if (ierr) { /* hopeless so get out */
305     MPI_Abort(*comm,*flag);
306   }
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "PetscEnd"
311 /*@C
312    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
313      wishes a clean exit somewhere deep in the program.
314 
315    Collective on PETSC_COMM_WORLD
316 
317    Options Database Keys are the same as for PetscFinalize()
318 
319    Level: advanced
320 
321    Note:
322    See PetscInitialize() for more general runtime options.
323 
324 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
325 @*/
326 PetscErrorCode  PetscEnd(void)
327 {
328   PetscFunctionBegin;
329   PetscFinalize();
330   exit(0);
331   return 0;
332 }
333 
334 PetscBool    PetscOptionsPublish = PETSC_FALSE;
335 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
336 extern PetscBool  petscsetmallocvisited;
337 static char       emacsmachinename[256];
338 
339 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
340 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
341 
342 #undef __FUNCT__
343 #define __FUNCT__ "PetscSetHelpVersionFunctions"
344 /*@C
345    PetscSetHelpVersionFunctions - Sets functions that print help and version information
346    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
347    This routine enables a "higher-level" package that uses PETSc to print its messages first.
348 
349    Input Parameter:
350 +  help - the help function (may be PETSC_NULL)
351 -  version - the version function (may be PETSC_NULL)
352 
353    Level: developer
354 
355    Concepts: package help message
356 
357 @*/
358 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
359 {
360   PetscFunctionBegin;
361   PetscExternalHelpFunction    = help;
362   PetscExternalVersionFunction = version;
363   PetscFunctionReturn(0);
364 }
365 
366 #undef __FUNCT__
367 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
368 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
369 {
370   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
371   MPI_Comm       comm = PETSC_COMM_WORLD;
372   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
373   PetscErrorCode ierr;
374   PetscReal      si;
375   int            i;
376   PetscMPIInt    rank;
377   char           version[256];
378 
379   PetscFunctionBegin;
380   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
381 
382   /*
383       Setup the memory management; support for tracing malloc() usage
384   */
385   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
386 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
387   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
388   if ((!flg2 || flg1) && !petscsetmallocvisited) {
389 #if defined(PETSC_HAVE_VALGRIND)
390     if (flg2 || !(RUNNING_ON_VALGRIND)) {
391       /* turn off default -malloc if valgrind is being used */
392 #endif
393       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
394 #if defined(PETSC_HAVE_VALGRIND)
395     }
396 #endif
397   }
398 #else
399   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
400   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
401   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
402 #endif
403   if (flg3) {
404     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
405   }
406   flg1 = PETSC_FALSE;
407   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
408   if (flg1) {
409     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
410     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
411   }
412 
413   flg1 = PETSC_FALSE;
414   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
415   if (!flg1) {
416     flg1 = PETSC_FALSE;
417     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
418   }
419   if (flg1) {
420     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
421   }
422 
423   /*
424       Set the display variable for graphics
425   */
426   ierr = PetscSetDisplay();CHKERRQ(ierr);
427 
428 
429   /*
430       Print the PETSc version information
431   */
432   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
433   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
434   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
435   if (flg1 || flg2 || flg3){
436 
437     /*
438        Print "higher-level" package version message
439     */
440     if (PetscExternalVersionFunction) {
441       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
442     }
443 
444     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
445     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
446 ------------------------------\n");CHKERRQ(ierr);
447     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
448     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
449     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
450     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
451     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
452     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
453     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
454 ------------------------------\n");CHKERRQ(ierr);
455   }
456 
457   /*
458        Print "higher-level" package help message
459   */
460   if (flg3){
461     if (PetscExternalHelpFunction) {
462       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
463     }
464   }
465 
466   /*
467       Setup the error handling
468   */
469   flg1 = PETSC_FALSE;
470   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
471   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
472   flg1 = PETSC_FALSE;
473   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
474   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
475   flg1 = PETSC_FALSE;
476   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
477   if (flg1) {
478     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
479   }
480   flg1 = PETSC_FALSE;
481   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
482   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
483   flg1 = PETSC_FALSE;
484   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
485   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
486 
487   /*
488       Setup debugger information
489   */
490   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
491   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
492   if (flg1) {
493     MPI_Errhandler err_handler;
494 
495     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
496     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
497     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
498     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
499   }
500   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
501   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
502   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
503   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
504   if (flg1 || flg2) {
505     PetscMPIInt    size;
506     PetscInt       lsize,*nodes;
507     MPI_Errhandler err_handler;
508     /*
509        we have to make sure that all processors have opened
510        connections to all other processors, otherwise once the
511        debugger has stated it is likely to receive a SIGUSR1
512        and kill the program.
513     */
514     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
515     if (size > 2) {
516       PetscMPIInt dummy = 0;
517       MPI_Status  status;
518       for (i=0; i<size; i++) {
519         if (rank != i) {
520           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
521         }
522       }
523       for (i=0; i<size; i++) {
524         if (rank != i) {
525           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
526         }
527       }
528     }
529     /* check if this processor node should be in debugger */
530     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
531     lsize = size;
532     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
533     if (flag) {
534       for (i=0; i<lsize; i++) {
535         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
536       }
537     }
538     if (!flag) {
539       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
540       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
541       if (flg1) {
542         ierr = PetscAttachDebugger();CHKERRQ(ierr);
543       } else {
544         ierr = PetscStopForDebugger();CHKERRQ(ierr);
545       }
546       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
547       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
548     }
549     ierr = PetscFree(nodes);CHKERRQ(ierr);
550   }
551 
552   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
553   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
554 
555 #if defined(PETSC_USE_SOCKET_VIEWER)
556   /*
557     Activates new sockets for zope if needed
558   */
559   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
560   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
561   if (flgz){
562     int  sockfd;
563     char hostname[256];
564     char username[256];
565     int  remoteport = 9999;
566 
567     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
568     if (!hostname[0]){
569       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
570     }
571     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
572     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
573     PETSC_ZOPEFD = fdopen(sockfd, "w");
574     if (flgzout){
575       PETSC_STDOUT = PETSC_ZOPEFD;
576       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
577       fprintf(PETSC_STDOUT, "<<<start>>>");
578     } else {
579       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
580       fprintf(PETSC_ZOPEFD, "<<<start>>>");
581     }
582   }
583 #endif
584 #if defined(PETSC_USE_SERVER)
585   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
586   if (flgz){
587     PetscInt port = PETSC_DECIDE;
588     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
589     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
590   }
591 #endif
592 
593   /*
594         Setup profiling and logging
595   */
596 #if defined (PETSC_USE_INFO)
597   {
598     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
599     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
600     if (flg1 && logname[0]) {
601       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
602     } else if (flg1) {
603       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
604     }
605   }
606 #endif
607 #if defined(PETSC_USE_LOG)
608   mname[0] = 0;
609   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
610   if (flg1) {
611     if (mname[0]) {
612       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
613     } else {
614       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
615     }
616   }
617 #if defined(PETSC_HAVE_MPE)
618   flg1 = PETSC_FALSE;
619   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
620   if (flg1) PetscLogMPEBegin();
621 #endif
622   flg1 = PETSC_FALSE;
623   flg2 = PETSC_FALSE;
624   flg3 = PETSC_FALSE;
625   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
626   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
627   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
628   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
629   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
630   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
631 
632   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
633   if (flg1) {
634     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
635     FILE *file;
636     if (mname[0]) {
637       sprintf(name,"%s.%d",mname,rank);
638       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
639       file = fopen(fname,"w");
640       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
641     } else {
642       file = PETSC_STDOUT;
643     }
644     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
645   }
646 #endif
647 
648   /*
649       Setup building of stack frames for all function calls
650   */
651 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
652   ierr = PetscStackCreate();CHKERRQ(ierr);
653 #endif
654 
655   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
656 
657 #if defined(PETSC_HAVE_PTHREADCLASSES)
658   /*
659       Determine whether user specified maximum number of threads
660    */
661   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
662 
663   ierr = PetscOptionsHasName(PETSC_NULL,"-main",&flg1);CHKERRQ(ierr);
664   if(flg1) {
665     cpu_set_t mset;
666     int icorr,ncorr = get_nprocs();
667     ierr = PetscOptionsGetInt(PETSC_NULL,"-main",&icorr,PETSC_NULL);CHKERRQ(ierr);
668     CPU_ZERO(&mset);
669     CPU_SET(icorr%ncorr,&mset);
670     sched_setaffinity(0,sizeof(cpu_set_t),&mset);
671   }
672 
673   PetscInt N_CORES = get_nprocs();
674   ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
675   char tstr[9];
676   char tbuf[2];
677   strcpy(tstr,"-thread");
678   for(i=0;i<PetscMaxThreads;i++) {
679     ThreadCoreAffinity[i] = i;
680     sprintf(tbuf,"%d",i);
681     strcat(tstr,tbuf);
682     ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
683     if(flg1) {
684       ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
685       ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
686     }
687     tstr[7] = '\0';
688   }
689 
690   /*
691       Determine whether to use thread pool
692    */
693   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
694   if (flg1) {
695     PetscUseThreadPool = PETSC_TRUE;
696     /* get the thread pool type */
697     PetscInt ipool = 0;
698     const char *choices[4] = {"true","tree","main","chain"};
699 
700     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
701     switch(ipool) {
702     case 1:
703       PetscThreadFunc       = &PetscThreadFunc_Tree;
704       PetscThreadInitialize = &PetscThreadInitialize_Tree;
705       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
706       MainWait              = &MainWait_Tree;
707       MainJob               = &MainJob_Tree;
708       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
709       break;
710     case 2:
711       PetscThreadFunc       = &PetscThreadFunc_Main;
712       PetscThreadInitialize = &PetscThreadInitialize_Main;
713       PetscThreadFinalize   = &PetscThreadFinalize_Main;
714       MainWait              = &MainWait_Main;
715       MainJob               = &MainJob_Main;
716       PetscInfo(PETSC_NULL,"Using main thread pool\n");
717       break;
718 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
719     case 3:
720 #else
721     default:
722 #endif
723       PetscThreadFunc       = &PetscThreadFunc_Chain;
724       PetscThreadInitialize = &PetscThreadInitialize_Chain;
725       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
726       MainWait              = &MainWait_Chain;
727       MainJob               = &MainJob_Chain;
728       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
729       break;
730 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
731     default:
732       PetscThreadFunc       = &PetscThreadFunc_True;
733       PetscThreadInitialize = &PetscThreadInitialize_True;
734       PetscThreadFinalize   = &PetscThreadFinalize_True;
735       MainWait              = &MainWait_True;
736       MainJob               = &MainJob_True;
737       PetscInfo(PETSC_NULL,"Using true thread pool\n");
738       break;
739 #endif
740     }
741     PetscThreadInitialize(PetscMaxThreads);
742   } else {
743     /* need to define these in the case on 'no threads' or 'thread create/destroy' */
744     /* could take any of the above versions */
745     MainJob               = &MainJob_Spawn;
746   }
747 #endif
748   /*
749        Print basic help message
750   */
751   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
752   if (flg1) {
753     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
763     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
764     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
768     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
769     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
770     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
771     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
773     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
776     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
777     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
778     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
779     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
780     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
781     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
782     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
783     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
784     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
785     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
786     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
787 #if defined(PETSC_USE_LOG)
788     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
789     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
790     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
791 #if defined(PETSC_HAVE_MPE)
792     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
793 #endif
794     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
795 #endif
796     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
797     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
798     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
799     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
800   }
801 
802   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
803   if (flg1) {
804     ierr = PetscSleep(si);CHKERRQ(ierr);
805   }
806 
807   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
808   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
809   if (f) {
810     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
811   }
812 
813 #if defined(PETSC_HAVE_CUSP)
814   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
815   if (flg3) flg1 = PETSC_TRUE;
816   else flg1 = PETSC_FALSE;
817   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
818   if (flg1) synchronizeCUSP = PETSC_TRUE;
819 #endif
820 
821   PetscFunctionReturn(0);
822 }
823 
824 #if defined(PETSC_HAVE_PTHREADCLASSES)
825 
826 /**** 'Tree' Thread Pool Functions ****/
827 void* PetscThreadFunc_Tree(void* arg) {
828   PetscErrorCode iterr;
829   int icorr,ierr;
830   int* pId = (int*)arg;
831   int ThreadId = *pId,Mary = 2,i,SubWorker;
832   PetscBool PeeOn;
833   cpu_set_t mset;
834   if (PETSC_PTHREADCLASSES_DEBUG) printf("Thread %d In Tree Thread Function\n",ThreadId);
835   icorr = ThreadCoreAffinity[ThreadId];
836   CPU_ZERO(&mset);
837   CPU_SET(icorr,&mset);
838   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
839 
840   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
841     PeeOn = PETSC_TRUE;
842   }
843   else {
844     PeeOn = PETSC_FALSE;
845   }
846   if(PeeOn==PETSC_FALSE) {
847     /* check your subordinates, wait for them to be ready */
848     for(i=1;i<=Mary;i++) {
849       SubWorker = Mary*ThreadId+i;
850       if(SubWorker<PetscMaxThreads) {
851         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
852         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
853           /* upon entry, automically releases the lock and blocks
854            upon return, has the lock */
855           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
856         }
857         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
858       }
859     }
860     /* your subordinates are now ready */
861   }
862   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
863   /* update your ready status */
864   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
865   if(ThreadId==0) {
866     job_tree.eJobStat = JobCompleted;
867     /* ignal main */
868     ierr = pthread_cond_signal(&main_cond);
869   }
870   else {
871     /* tell your boss that you're ready to work */
872     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
873   }
874   /* the while loop needs to have an exit
875   the 'main' thread can terminate all the threads by performing a broadcast
876    and calling FuncFinish */
877   while(PetscThreadGo) {
878     /*need to check the condition to ensure we don't have to wait
879       waiting when you don't have to causes problems
880      also need to check the condition to ensure proper handling of spurious wakeups */
881     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
882       /* upon entry, automically releases the lock and blocks
883        upon return, has the lock */
884         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
885 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
886 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
887     }
888     if(ThreadId==0) {
889       job_tree.startJob = PETSC_FALSE;
890       job_tree.eJobStat = ThreadsWorking;
891     }
892     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
893     if(PeeOn==PETSC_FALSE) {
894       /* tell your subordinates it's time to get to work */
895       for(i=1; i<=Mary; i++) {
896 	SubWorker = Mary*ThreadId+i;
897         if(SubWorker<PetscMaxThreads) {
898           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
899         }
900       }
901     }
902     /* do your job */
903     if(job_tree.pdata==NULL) {
904       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
905     }
906     else {
907       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
908     }
909     if(iterr!=0) {
910       ithreaderr = 1;
911     }
912     if(PetscThreadGo) {
913       /* reset job, get ready for more */
914       if(PeeOn==PETSC_FALSE) {
915         /* check your subordinates, waiting for them to be ready
916          how do you know for a fact that a given subordinate has actually started? */
917 	for(i=1;i<=Mary;i++) {
918 	  SubWorker = Mary*ThreadId+i;
919           if(SubWorker<PetscMaxThreads) {
920             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
921             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
922               /* upon entry, automically releases the lock and blocks
923                upon return, has the lock */
924               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
925             }
926             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
927           }
928 	}
929         /* your subordinates are now ready */
930       }
931       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
932       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
933       if(ThreadId==0) {
934 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
935         /* root thread signals 'main' */
936         ierr = pthread_cond_signal(&main_cond);
937       }
938       else {
939         /* signal your boss before you go to sleep */
940         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
941       }
942     }
943   }
944   return NULL;
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "PetscThreadInitialize_Tree"
949 void* PetscThreadInitialize_Tree(PetscInt N) {
950   PetscInt i,ierr;
951   int status;
952 
953   if(PetscUseThreadPool) {
954     size_t Val1 = (size_t)CACHE_LINE_SIZE;
955     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
956     arrmutex = (char*)memalign(Val1,Val2);
957     arrcond1 = (char*)memalign(Val1,Val2);
958     arrcond2 = (char*)memalign(Val1,Val2);
959     arrstart = (char*)memalign(Val1,Val2);
960     arrready = (char*)memalign(Val1,Val2);
961     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
962     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
963     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
964     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
965     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
966     /* initialize job structure */
967     for(i=0; i<PetscMaxThreads; i++) {
968       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
969       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
970       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
971       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
972       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
973     }
974     for(i=0; i<PetscMaxThreads; i++) {
975       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
976       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
977       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
978       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
979       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
980     }
981     job_tree.pfunc = NULL;
982     job_tree.pdata = (void**)malloc(N*sizeof(void*));
983     job_tree.startJob = PETSC_FALSE;
984     job_tree.eJobStat = JobInitiated;
985     pVal = (int*)malloc(N*sizeof(int));
986     /* allocate memory in the heap for the thread structure */
987     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
988     /* create threads */
989     for(i=0; i<N; i++) {
990       pVal[i] = i;
991       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
992       /* should check status */
993     }
994   }
995   return NULL;
996 }
997 
998 #undef __FUNCT__
999 #define __FUNCT__ "PetscThreadFinalize_Tree"
1000 PetscErrorCode PetscThreadFinalize_Tree() {
1001   int i,ierr;
1002   void* jstatus;
1003 
1004   PetscFunctionBegin;
1005 
1006   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1007   /* join the threads */
1008   for(i=0; i<PetscMaxThreads; i++) {
1009     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1010     /* do error checking*/
1011   }
1012   free(PetscThreadPoint);
1013   free(arrmutex);
1014   free(arrcond1);
1015   free(arrcond2);
1016   free(arrstart);
1017   free(arrready);
1018   free(job_tree.pdata);
1019   free(pVal);
1020 
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MainWait_Tree"
1026 void MainWait_Tree() {
1027   int ierr;
1028   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1029   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1030     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1031   }
1032   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1033 }
1034 
1035 #undef __FUNCT__
1036 #define __FUNCT__ "MainJob_Tree"
1037 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1038   int i,ierr;
1039   PetscErrorCode ijoberr = 0;
1040 
1041   MainWait();
1042   job_tree.pfunc = pFunc;
1043   job_tree.pdata = data;
1044   job_tree.startJob = PETSC_TRUE;
1045   for(i=0; i<PetscMaxThreads; i++) {
1046     *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1047   }
1048   job_tree.eJobStat = JobInitiated;
1049   ierr = pthread_cond_signal(job_tree.cond2array[0]);
1050   if(pFunc!=FuncFinish) {
1051     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1052   }
1053 
1054   if(ithreaderr) {
1055     ijoberr = ithreaderr;
1056   }
1057   return ijoberr;
1058 }
1059 /****  ****/
1060 
1061 /**** 'Main' Thread Pool Functions ****/
1062 void* PetscThreadFunc_Main(void* arg) {
1063   PetscErrorCode iterr;
1064   int icorr,ierr;
1065   int* pId = (int*)arg;
1066   int ThreadId = *pId;
1067   cpu_set_t mset;
1068   if (PETSC_PTHREADCLASSES_DEBUG) printf("Thread %d In Main Thread Function\n",ThreadId);
1069   icorr = ThreadCoreAffinity[ThreadId];
1070   CPU_ZERO(&mset);
1071   CPU_SET(icorr,&mset);
1072   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1073 
1074   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1075   /* update your ready status */
1076   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1077   /* tell the BOSS that you're ready to work before you go to sleep */
1078   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1079 
1080   /* the while loop needs to have an exit
1081      the 'main' thread can terminate all the threads by performing a broadcast
1082      and calling FuncFinish */
1083   while(PetscThreadGo) {
1084     /* need to check the condition to ensure we don't have to wait
1085        waiting when you don't have to causes problems
1086      also need to check the condition to ensure proper handling of spurious wakeups */
1087     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1088       /* upon entry, atomically releases the lock and blocks
1089        upon return, has the lock */
1090         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1091 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1092     }
1093     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1094     if(job_main.pdata==NULL) {
1095       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1096     }
1097     else {
1098       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1099     }
1100     if(iterr!=0) {
1101       ithreaderr = 1;
1102     }
1103     if(PetscThreadGo) {
1104       /* reset job, get ready for more */
1105       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1106       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1107       /* tell the BOSS that you're ready to work before you go to sleep */
1108       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1109     }
1110   }
1111   return NULL;
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "PetscThreadInitialize_Main"
1116 void* PetscThreadInitialize_Main(PetscInt N) {
1117   PetscInt i,ierr;
1118   int status;
1119 
1120   if(PetscUseThreadPool) {
1121     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1122     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1123     arrmutex = (char*)memalign(Val1,Val2);
1124     arrcond1 = (char*)memalign(Val1,Val2);
1125     arrcond2 = (char*)memalign(Val1,Val2);
1126     arrstart = (char*)memalign(Val1,Val2);
1127     arrready = (char*)memalign(Val1,Val2);
1128     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1129     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1130     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1131     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1132     /* initialize job structure */
1133     for(i=0; i<PetscMaxThreads; i++) {
1134       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1135       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1136       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1137       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1138     }
1139     for(i=0; i<PetscMaxThreads; i++) {
1140       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1141       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1142       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1143       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1144     }
1145     job_main.pfunc = NULL;
1146     job_main.pdata = (void**)malloc(N*sizeof(void*));
1147     pVal = (int*)malloc(N*sizeof(int));
1148     /* allocate memory in the heap for the thread structure */
1149     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1150     /* create threads */
1151     for(i=0; i<N; i++) {
1152       pVal[i] = i;
1153       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1154       /* error check */
1155     }
1156   }
1157   else {
1158   }
1159   return NULL;
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "PetscThreadFinalize_Main"
1164 PetscErrorCode PetscThreadFinalize_Main() {
1165   int i,ierr;
1166   void* jstatus;
1167 
1168   PetscFunctionBegin;
1169 
1170   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1171   /* join the threads */
1172   for(i=0; i<PetscMaxThreads; i++) {
1173     ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1174   }
1175   free(PetscThreadPoint);
1176   free(arrmutex);
1177   free(arrcond1);
1178   free(arrcond2);
1179   free(arrstart);
1180   free(arrready);
1181   free(job_main.pdata);
1182   free(pVal);
1183 
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MainWait_Main"
1189 void MainWait_Main() {
1190   int i,ierr;
1191   for(i=0; i<PetscMaxThreads; i++) {
1192     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1193     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1194       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1195     }
1196     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1197   }
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MainJob_Main"
1202 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1203   int i,ierr;
1204   PetscErrorCode ijoberr = 0;
1205 
1206   MainWait(); /* you know everyone is waiting to be signalled! */
1207   job_main.pfunc = pFunc;
1208   job_main.pdata = data;
1209   for(i=0; i<PetscMaxThreads; i++) {
1210     *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1211   }
1212   /* tell the threads to go to work */
1213   for(i=0; i<PetscMaxThreads; i++) {
1214     ierr = pthread_cond_signal(job_main.cond2array[i]);
1215   }
1216   if(pFunc!=FuncFinish) {
1217     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1218   }
1219 
1220   if(ithreaderr) {
1221     ijoberr = ithreaderr;
1222   }
1223   return ijoberr;
1224 }
1225 /****  ****/
1226 
1227 /**** Chain Thread Functions ****/
1228 void* PetscThreadFunc_Chain(void* arg) {
1229   PetscErrorCode iterr;
1230   int icorr,ierr;
1231   int* pId = (int*)arg;
1232   int ThreadId = *pId;
1233   int SubWorker = ThreadId + 1;
1234   PetscBool PeeOn;
1235   cpu_set_t mset;
1236   if (PETSC_PTHREADCLASSES_DEBUG) printf("Thread %d In Chain Thread Function\n",ThreadId);
1237   icorr = ThreadCoreAffinity[ThreadId];
1238   CPU_ZERO(&mset);
1239   CPU_SET(icorr,&mset);
1240   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1241 
1242   if(ThreadId==(PetscMaxThreads-1)) {
1243     PeeOn = PETSC_TRUE;
1244   }
1245   else {
1246     PeeOn = PETSC_FALSE;
1247   }
1248   if(PeeOn==PETSC_FALSE) {
1249     /* check your subordinate, wait for him to be ready */
1250     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1251     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1252       /* upon entry, automically releases the lock and blocks
1253        upon return, has the lock */
1254       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1255     }
1256     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1257     /* your subordinate is now ready*/
1258   }
1259   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1260   /* update your ready status */
1261   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1262   if(ThreadId==0) {
1263     job_chain.eJobStat = JobCompleted;
1264     /* signal main */
1265     ierr = pthread_cond_signal(&main_cond);
1266   }
1267   else {
1268     /* tell your boss that you're ready to work */
1269     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1270   }
1271   /*  the while loop needs to have an exit
1272      the 'main' thread can terminate all the threads by performing a broadcast
1273    and calling FuncFinish */
1274   while(PetscThreadGo) {
1275     /* need to check the condition to ensure we don't have to wait
1276        waiting when you don't have to causes problems
1277      also need to check the condition to ensure proper handling of spurious wakeups */
1278     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1279       /*upon entry, automically releases the lock and blocks
1280        upon return, has the lock */
1281         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1282 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1283 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1284     }
1285     if(ThreadId==0) {
1286       job_chain.startJob = PETSC_FALSE;
1287       job_chain.eJobStat = ThreadsWorking;
1288     }
1289     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1290     if(PeeOn==PETSC_FALSE) {
1291       /* tell your subworker it's time to get to work */
1292       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1293     }
1294     /* do your job */
1295     if(job_chain.pdata==NULL) {
1296       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1297     }
1298     else {
1299       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1300     }
1301     if(iterr!=0) {
1302       ithreaderr = 1;
1303     }
1304     if(PetscThreadGo) {
1305       /* reset job, get ready for more */
1306       if(PeeOn==PETSC_FALSE) {
1307         /* check your subordinate, wait for him to be ready
1308          how do you know for a fact that your subordinate has actually started? */
1309         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1310         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1311           /* upon entry, automically releases the lock and blocks
1312            upon return, has the lock */
1313           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1314         }
1315         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1316         /* your subordinate is now ready */
1317       }
1318       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1319       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1320       if(ThreadId==0) {
1321 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1322         /* root thread (foreman) signals 'main' */
1323         ierr = pthread_cond_signal(&main_cond);
1324       }
1325       else {
1326         /* signal your boss before you go to sleep */
1327         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1328       }
1329     }
1330   }
1331   return NULL;
1332 }
1333 
1334 #undef __FUNCT__
1335 #define __FUNCT__ "PetscThreadInitialize_Chain"
1336 void* PetscThreadInitialize_Chain(PetscInt N) {
1337   PetscInt i,ierr;
1338   int status;
1339 
1340   if(PetscUseThreadPool) {
1341     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1342     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1343     arrmutex = (char*)memalign(Val1,Val2);
1344     arrcond1 = (char*)memalign(Val1,Val2);
1345     arrcond2 = (char*)memalign(Val1,Val2);
1346     arrstart = (char*)memalign(Val1,Val2);
1347     arrready = (char*)memalign(Val1,Val2);
1348     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1349     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1350     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1351     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1352     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1353     /* initialize job structure */
1354     for(i=0; i<PetscMaxThreads; i++) {
1355       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1356       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1357       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1358       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1359       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1360     }
1361     for(i=0; i<PetscMaxThreads; i++) {
1362       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1363       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1364       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1365       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1366       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1367     }
1368     job_chain.pfunc = NULL;
1369     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1370     job_chain.startJob = PETSC_FALSE;
1371     job_chain.eJobStat = JobInitiated;
1372     pVal = (int*)malloc(N*sizeof(int));
1373     /* allocate memory in the heap for the thread structure */
1374     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1375     /* create threads */
1376     for(i=0; i<N; i++) {
1377       pVal[i] = i;
1378       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1379       /* should check error */
1380     }
1381   }
1382   else {
1383   }
1384   return NULL;
1385 }
1386 
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "PetscThreadFinalize_Chain"
1390 PetscErrorCode PetscThreadFinalize_Chain() {
1391   int i,ierr;
1392   void* jstatus;
1393 
1394   PetscFunctionBegin;
1395 
1396   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1397   /* join the threads */
1398   for(i=0; i<PetscMaxThreads; i++) {
1399     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1400     /* should check error */
1401   }
1402   free(PetscThreadPoint);
1403   free(arrmutex);
1404   free(arrcond1);
1405   free(arrcond2);
1406   free(arrstart);
1407   free(arrready);
1408   free(job_chain.pdata);
1409   free(pVal);
1410 
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MainWait_Chain"
1416 void MainWait_Chain() {
1417   int ierr;
1418   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1419   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1420     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1421   }
1422   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MainJob_Chain"
1427 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1428   int i,ierr;
1429   PetscErrorCode ijoberr = 0;
1430 
1431   MainWait();
1432   job_chain.pfunc = pFunc;
1433   job_chain.pdata = data;
1434   job_chain.startJob = PETSC_TRUE;
1435   for(i=0; i<PetscMaxThreads; i++) {
1436     *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1437   }
1438   job_chain.eJobStat = JobInitiated;
1439   ierr = pthread_cond_signal(job_chain.cond2array[0]);
1440   if(pFunc!=FuncFinish) {
1441     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1442   }
1443 
1444   if(ithreaderr) {
1445     ijoberr = ithreaderr;
1446   }
1447   return ijoberr;
1448 }
1449 /****  ****/
1450 
1451 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1452 /**** True Thread Functions ****/
1453 void* PetscThreadFunc_True(void* arg) {
1454   int icorr,ierr,iVal;
1455   int* pId = (int*)arg;
1456   int ThreadId = *pId;
1457   PetscErrorCode iterr;
1458   cpu_set_t mset;
1459   if (PETSC_PTHREADCLASSES_DEBUG) printf("Thread %d In True Pool Thread Function\n",ThreadId);
1460   icorr = ThreadCoreAffinity[ThreadId];
1461   CPU_ZERO(&mset);
1462   CPU_SET(icorr,&mset);
1463   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1464 
1465   ierr = pthread_mutex_lock(&job_true.mutex);
1466   job_true.iNumReadyThreads++;
1467   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1468     ierr = pthread_cond_signal(&main_cond);
1469   }
1470   /*the while loop needs to have an exit
1471     the 'main' thread can terminate all the threads by performing a broadcast
1472    and calling FuncFinish */
1473   while(PetscThreadGo) {
1474     /*need to check the condition to ensure we don't have to wait
1475       waiting when you don't have to causes problems
1476      also need to wait if another thread sneaks in and messes with the predicate */
1477     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1478       /* upon entry, automically releases the lock and blocks
1479        upon return, has the lock */
1480       if (PETSC_PTHREADCLASSES_DEBUG) printf("Thread %d Going to Sleep!\n",ThreadId);
1481       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1482     }
1483     job_true.startJob = PETSC_FALSE;
1484     job_true.iNumJobThreads--;
1485     job_true.iNumReadyThreads--;
1486     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1487     pthread_mutex_unlock(&job_true.mutex);
1488     if(job_true.pdata==NULL) {
1489       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1490     }
1491     else {
1492       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1493     }
1494     if(iterr!=0) {
1495       ithreaderr = 1;
1496     }
1497     if (PETSC_PTHREADCLASSES_DEBUG) printf("Thread %d Finished Job\n",ThreadId);
1498     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1499       what happens if a thread finishes before they all start? BAD!
1500      what happens if a thread finishes before any else start? BAD! */
1501     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1502     /* reset job */
1503     if(PetscThreadGo) {
1504       pthread_mutex_lock(&job_true.mutex);
1505       job_true.iNumReadyThreads++;
1506       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1507 	/* signal the 'main' thread that the job is done! (only done once) */
1508 	ierr = pthread_cond_signal(&main_cond);
1509       }
1510     }
1511   }
1512   return NULL;
1513 }
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "PetscThreadInitialize_True"
1517 void* PetscThreadInitialize_True(PetscInt N) {
1518   PetscInt i;
1519   int status;
1520 
1521   pVal = (int*)malloc(N*sizeof(int));
1522   /* allocate memory in the heap for the thread structure */
1523   PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1524   BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1525   job_true.pdata = (void**)malloc(N*sizeof(void*));
1526   for(i=0; i<N; i++) {
1527     pVal[i] = i;
1528     status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1529     /* error check to ensure proper thread creation */
1530     status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1531     /* should check error */
1532   }
1533   if (PETSC_PTHREADCLASSES_DEBUG) printf("Finished True Thread Pool Initialization\n");
1534   return NULL;
1535 }
1536 
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "PetscThreadFinalize_True"
1540 PetscErrorCode PetscThreadFinalize_True() {
1541   int i,ierr;
1542   void* jstatus;
1543 
1544   PetscFunctionBegin;
1545 
1546   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1547   /* join the threads */
1548   for(i=0; i<PetscMaxThreads; i++) {
1549     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1550   }
1551   free(BarrPoint);
1552   free(PetscThreadPoint);
1553 
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MainWait_True"
1559 void MainWait_True() {
1560   int ierr;
1561   ierr = pthread_mutex_lock(&job_true.mutex);
1562   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1563     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1564   }
1565   ierr = pthread_mutex_unlock(&job_true.mutex);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MainJob_True"
1570 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1571   int ierr;
1572   PetscErrorCode ijoberr = 0;
1573 
1574   MainWait();
1575   job_true.pfunc = pFunc;
1576   job_true.pdata = data;
1577   job_true.pbarr = &BarrPoint[n];
1578   job_true.iNumJobThreads = n;
1579   job_true.startJob = PETSC_TRUE;
1580   ierr = pthread_cond_broadcast(&job_true.cond);
1581   if(pFunc!=FuncFinish) {
1582     MainWait(); /* why wait after? guarantees that job gets done */
1583   }
1584 
1585   if(ithreaderr) {
1586     ijoberr = ithreaderr;
1587   }
1588   return ijoberr;
1589 }
1590 /**** NO THREAD POOL FUNCTION ****/
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MainJob_Spawn"
1593 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1594   PetscErrorCode ijoberr = 0;
1595 
1596   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1597   PetscThreadPoint = apThread; /* point to same place */
1598   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1599   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1600   free(apThread);
1601 
1602   return ijoberr;
1603 }
1604 /****  ****/
1605 #endif
1606 
1607 void* FuncFinish(void* arg) {
1608   PetscThreadGo = PETSC_FALSE;
1609   return(0);
1610 }
1611 
1612 #endif
1613