xref: /petsc/src/sys/objects/init.c (revision ef386f4b9e9a5f76b306de1e9a9e07c881c3254b)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #include <petscsys.h>        /*I  "petscsys.h"   I*/
10 
11 #if defined(PETSC_USE_PTHREAD)
12 #ifndef _GNU_SOURCE
13 #define _GNU_SOURCE
14 #endif
15 #if defined(PETSC_HAVE_SCHED_H)
16 #include <sched.h>
17 #endif
18 #include <pthread.h>
19 #endif
20 
21 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
22 #include <sys/sysinfo.h>
23 #endif
24 #include <unistd.h>
25 #if defined(PETSC_HAVE_STDLIB_H)
26 #include <stdlib.h>
27 #endif
28 #if defined(PETSC_HAVE_MALLOC_H)
29 #include <malloc.h>
30 #endif
31 #if defined(PETSC_HAVE_VALGRIND)
32 #include <valgrind/valgrind.h>
33 #endif
34 
35 /* ------------------------Nasty global variables -------------------------------*/
36 /*
37      Indicates if PETSc started up MPI, or it was
38    already started before PETSc was initialized.
39 */
40 PetscBool    PetscBeganMPI         = PETSC_FALSE;
41 PetscBool    PetscInitializeCalled = PETSC_FALSE;
42 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
43 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
44 PetscBool    PetscThreadGo         = PETSC_TRUE;
45 PetscMPIInt  PetscGlobalRank = -1;
46 PetscMPIInt  PetscGlobalSize = -1;
47 
48 #if defined(PETSC_HAVE_PTHREADCLASSES)
49 PetscMPIInt  PetscMaxThreads = 2;
50 pthread_t*   PetscThreadPoint;
51 #define PETSC_HAVE_PTHREAD_BARRIER
52 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
53 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
54 #endif
55 PetscErrorCode ithreaderr = 0;
56 int*         pVal;
57 
58 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
59 int* ThreadCoreAffinity;
60 
61 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
62 
63 typedef struct {
64   pthread_mutex_t** mutexarray;
65   pthread_cond_t**  cond1array;
66   pthread_cond_t** cond2array;
67   void* (*pfunc)(void*);
68   void** pdata;
69   PetscBool startJob;
70   estat eJobStat;
71   PetscBool** arrThreadStarted;
72   PetscBool** arrThreadReady;
73 } sjob_tree;
74 sjob_tree job_tree;
75 typedef struct {
76   pthread_mutex_t** mutexarray;
77   pthread_cond_t**  cond1array;
78   pthread_cond_t** cond2array;
79   void* (*pfunc)(void*);
80   void** pdata;
81   PetscBool** arrThreadReady;
82 } sjob_main;
83 sjob_main job_main;
84 typedef struct {
85   pthread_mutex_t** mutexarray;
86   pthread_cond_t**  cond1array;
87   pthread_cond_t** cond2array;
88   void* (*pfunc)(void*);
89   void** pdata;
90   PetscBool startJob;
91   estat eJobStat;
92   PetscBool** arrThreadStarted;
93   PetscBool** arrThreadReady;
94 } sjob_chain;
95 sjob_chain job_chain;
96 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
97 typedef struct {
98   pthread_mutex_t mutex;
99   pthread_cond_t cond;
100   void* (*pfunc)(void*);
101   void** pdata;
102   pthread_barrier_t* pbarr;
103   int iNumJobThreads;
104   int iNumReadyThreads;
105   PetscBool startJob;
106 } sjob_true;
107 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
108 #endif
109 
110 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
111 char* arrmutex; /* used by 'chain','main','tree' thread pools */
112 char* arrcond1; /* used by 'chain','main','tree' thread pools */
113 char* arrcond2; /* used by 'chain','main','tree' thread pools */
114 char* arrstart; /* used by 'chain','main','tree' thread pools */
115 char* arrready; /* used by 'chain','main','tree' thread pools */
116 
117 /* Function Pointers */
118 void*          (*PetscThreadFunc)(void*) = NULL;
119 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
120 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
121 void           (*MainWait)(void) = NULL;
122 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
123 /**** Tree Thread Pool Functions ****/
124 void*          PetscThreadFunc_Tree(void*);
125 void*          PetscThreadInitialize_Tree(PetscInt);
126 PetscErrorCode PetscThreadFinalize_Tree(void);
127 void           MainWait_Tree(void);
128 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
129 /**** Main Thread Pool Functions ****/
130 void*          PetscThreadFunc_Main(void*);
131 void*          PetscThreadInitialize_Main(PetscInt);
132 PetscErrorCode PetscThreadFinalize_Main(void);
133 void           MainWait_Main(void);
134 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
135 /**** Chain Thread Pool Functions ****/
136 void*          PetscThreadFunc_Chain(void*);
137 void*          PetscThreadInitialize_Chain(PetscInt);
138 PetscErrorCode PetscThreadFinalize_Chain(void);
139 void           MainWait_Chain(void);
140 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
141 /**** True Thread Pool Functions ****/
142 void*          PetscThreadFunc_True(void*);
143 void*          PetscThreadInitialize_True(PetscInt);
144 PetscErrorCode PetscThreadFinalize_True(void);
145 void           MainWait_True(void);
146 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
147 /**** NO Thread Pool Function  ****/
148 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
149 /****  ****/
150 void* FuncFinish(void*);
151 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
152 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
153 #endif
154 
155 #if defined(PETSC_USE_COMPLEX)
156 #if defined(PETSC_COMPLEX_INSTANTIATE)
157 template <> class std::complex<double>; /* instantiate complex template class */
158 #endif
159 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
160 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
161 MPI_Datatype   MPI_C_COMPLEX;
162 #endif
163 PetscScalar    PETSC_i;
164 #else
165 PetscScalar    PETSC_i = 0.0;
166 #endif
167 #if defined(PETSC_USE_REAL___FLOAT128)
168 MPI_Datatype   MPIU___FLOAT128 = 0;
169 #endif
170 MPI_Datatype   MPIU_2SCALAR = 0;
171 MPI_Datatype   MPIU_2INT = 0;
172 
173 /*
174      These are needed by petscbt.h
175 */
176 #include <petscbt.h>
177 char      _BT_mask = ' ';
178 char      _BT_c = ' ';
179 PetscInt  _BT_idx  = 0;
180 
181 /*
182        Function that is called to display all error messages
183 */
184 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
185 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
186 #if defined(PETSC_HAVE_MATLAB_ENGINE)
187 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
188 #else
189 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
190 #endif
191 /*
192   This is needed to turn on/off cusp synchronization */
193 PetscBool   synchronizeCUSP = PETSC_FALSE;
194 
195 /* ------------------------------------------------------------------------------*/
196 /*
197    Optional file where all PETSc output from various prints is saved
198 */
199 FILE *petsc_history = PETSC_NULL;
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "PetscOpenHistoryFile"
203 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
204 {
205   PetscErrorCode ierr;
206   PetscMPIInt    rank,size;
207   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
208   char           version[256];
209 
210   PetscFunctionBegin;
211   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
212   if (!rank) {
213     char        arch[10];
214     int         err;
215     PetscViewer viewer;
216 
217     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
218     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
219     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
220     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
221     if (filename) {
222       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
223     } else {
224       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
225       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
226       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
227     }
228 
229     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
230     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
231     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
232     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
233     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
234     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
235     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
236     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
237     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
238     err = fflush(*fd);
239     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
240   }
241   PetscFunctionReturn(0);
242 }
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "PetscCloseHistoryFile"
246 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
247 {
248   PetscErrorCode ierr;
249   PetscMPIInt    rank;
250   char           date[64];
251   int            err;
252 
253   PetscFunctionBegin;
254   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
257     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
258     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
259     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
260     err = fflush(*fd);
261     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
262     err = fclose(*fd);
263     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
264   }
265   PetscFunctionReturn(0);
266 }
267 
268 /* ------------------------------------------------------------------------------*/
269 
270 /*
271    This is ugly and probably belongs somewhere else, but I want to
272   be able to put a true MPI abort error handler with command line args.
273 
274     This is so MPI errors in the debugger will leave all the stack
275   frames. The default MP_Abort() cleans up and exits thus providing no useful information
276   in the debugger hence we call abort() instead of MPI_Abort().
277 */
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "Petsc_MPI_AbortOnError"
281 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
282 {
283   PetscFunctionBegin;
284   (*PetscErrorPrintf)("MPI error %d\n",*flag);
285   abort();
286 }
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
290 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
291 {
292   PetscErrorCode ierr;
293 
294   PetscFunctionBegin;
295   (*PetscErrorPrintf)("MPI error %d\n",*flag);
296   ierr = PetscAttachDebugger();
297   if (ierr) { /* hopeless so get out */
298     MPI_Abort(*comm,*flag);
299   }
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "PetscEnd"
304 /*@C
305    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
306      wishes a clean exit somewhere deep in the program.
307 
308    Collective on PETSC_COMM_WORLD
309 
310    Options Database Keys are the same as for PetscFinalize()
311 
312    Level: advanced
313 
314    Note:
315    See PetscInitialize() for more general runtime options.
316 
317 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
318 @*/
319 PetscErrorCode  PetscEnd(void)
320 {
321   PetscFunctionBegin;
322   PetscFinalize();
323   exit(0);
324   return 0;
325 }
326 
327 PetscBool    PetscOptionsPublish = PETSC_FALSE;
328 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
329 extern PetscBool  petscsetmallocvisited;
330 static char       emacsmachinename[256];
331 
332 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
333 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "PetscSetHelpVersionFunctions"
337 /*@C
338    PetscSetHelpVersionFunctions - Sets functions that print help and version information
339    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
340    This routine enables a "higher-level" package that uses PETSc to print its messages first.
341 
342    Input Parameter:
343 +  help - the help function (may be PETSC_NULL)
344 -  version - the version function (may be PETSC_NULL)
345 
346    Level: developer
347 
348    Concepts: package help message
349 
350 @*/
351 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
352 {
353   PetscFunctionBegin;
354   PetscExternalHelpFunction    = help;
355   PetscExternalVersionFunction = version;
356   PetscFunctionReturn(0);
357 }
358 
359 #undef __FUNCT__
360 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
361 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
362 {
363   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
364   MPI_Comm       comm = PETSC_COMM_WORLD;
365   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
366   PetscErrorCode ierr;
367   PetscReal      si;
368   int            i;
369   PetscMPIInt    rank;
370   char           version[256];
371 
372   PetscFunctionBegin;
373   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
374 
375   /*
376       Setup the memory management; support for tracing malloc() usage
377   */
378   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
379 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
380   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
381   if ((!flg2 || flg1) && !petscsetmallocvisited) {
382 #if defined(PETSC_HAVE_VALGRIND)
383     if (flg2 || !(RUNNING_ON_VALGRIND)) {
384       /* turn off default -malloc if valgrind is being used */
385 #endif
386       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
387 #if defined(PETSC_HAVE_VALGRIND)
388     }
389 #endif
390   }
391 #else
392   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
393   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
394   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
395 #endif
396   if (flg3) {
397     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
398   }
399   flg1 = PETSC_FALSE;
400   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
401   if (flg1) {
402     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
403     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
404   }
405 
406   flg1 = PETSC_FALSE;
407   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
408   if (!flg1) {
409     flg1 = PETSC_FALSE;
410     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
411   }
412   if (flg1) {
413     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
414   }
415 
416   /*
417       Set the display variable for graphics
418   */
419   ierr = PetscSetDisplay();CHKERRQ(ierr);
420 
421 
422   /*
423       Print the PETSc version information
424   */
425   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
426   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
427   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
428   if (flg1 || flg2 || flg3){
429 
430     /*
431        Print "higher-level" package version message
432     */
433     if (PetscExternalVersionFunction) {
434       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
435     }
436 
437     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
439 ------------------------------\n");CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
441     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
442     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
443     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
444     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
445     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
446     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
447 ------------------------------\n");CHKERRQ(ierr);
448   }
449 
450   /*
451        Print "higher-level" package help message
452   */
453   if (flg3){
454     if (PetscExternalHelpFunction) {
455       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
456     }
457   }
458 
459   /*
460       Setup the error handling
461   */
462   flg1 = PETSC_FALSE;
463   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
464   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
465   flg1 = PETSC_FALSE;
466   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
467   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
468   flg1 = PETSC_FALSE;
469   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
470   if (flg1) {
471     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
472   }
473   flg1 = PETSC_FALSE;
474   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
475   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
476   flg1 = PETSC_FALSE;
477   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
478   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
479 
480   /*
481       Setup debugger information
482   */
483   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
484   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
485   if (flg1) {
486     MPI_Errhandler err_handler;
487 
488     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
489     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
490     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
491     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
492   }
493   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
494   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
495   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
496   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
497   if (flg1 || flg2) {
498     PetscMPIInt    size;
499     PetscInt       lsize,*nodes;
500     MPI_Errhandler err_handler;
501     /*
502        we have to make sure that all processors have opened
503        connections to all other processors, otherwise once the
504        debugger has stated it is likely to receive a SIGUSR1
505        and kill the program.
506     */
507     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
508     if (size > 2) {
509       PetscMPIInt dummy = 0;
510       MPI_Status  status;
511       for (i=0; i<size; i++) {
512         if (rank != i) {
513           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
514         }
515       }
516       for (i=0; i<size; i++) {
517         if (rank != i) {
518           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
519         }
520       }
521     }
522     /* check if this processor node should be in debugger */
523     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
524     lsize = size;
525     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
526     if (flag) {
527       for (i=0; i<lsize; i++) {
528         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
529       }
530     }
531     if (!flag) {
532       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
533       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
534       if (flg1) {
535         ierr = PetscAttachDebugger();CHKERRQ(ierr);
536       } else {
537         ierr = PetscStopForDebugger();CHKERRQ(ierr);
538       }
539       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
540       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
541     }
542     ierr = PetscFree(nodes);CHKERRQ(ierr);
543   }
544 
545   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
546   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
547 
548 #if defined(PETSC_USE_SOCKET_VIEWER)
549   /*
550     Activates new sockets for zope if needed
551   */
552   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
553   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
554   if (flgz){
555     int  sockfd;
556     char hostname[256];
557     char username[256];
558     int  remoteport = 9999;
559 
560     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
561     if (!hostname[0]){
562       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
563     }
564     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
565     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
566     PETSC_ZOPEFD = fdopen(sockfd, "w");
567     if (flgzout){
568       PETSC_STDOUT = PETSC_ZOPEFD;
569       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
570       fprintf(PETSC_STDOUT, "<<<start>>>");
571     } else {
572       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
573       fprintf(PETSC_ZOPEFD, "<<<start>>>");
574     }
575   }
576 #endif
577 #if defined(PETSC_USE_SERVER)
578   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
579   if (flgz){
580     PetscInt port = PETSC_DECIDE;
581     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
582     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
583   }
584 #endif
585 
586   /*
587         Setup profiling and logging
588   */
589 #if defined (PETSC_USE_INFO)
590   {
591     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
592     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
593     if (flg1 && logname[0]) {
594       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
595     } else if (flg1) {
596       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
597     }
598   }
599 #endif
600 #if defined(PETSC_USE_LOG)
601   mname[0] = 0;
602   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
603   if (flg1) {
604     if (mname[0]) {
605       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
606     } else {
607       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
608     }
609   }
610 #if defined(PETSC_HAVE_MPE)
611   flg1 = PETSC_FALSE;
612   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
613   if (flg1) PetscLogMPEBegin();
614 #endif
615   flg1 = PETSC_FALSE;
616   flg2 = PETSC_FALSE;
617   flg3 = PETSC_FALSE;
618   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
619   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
620   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
621   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
622   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
623   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
624 
625   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
626   if (flg1) {
627     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
628     FILE *file;
629     if (mname[0]) {
630       sprintf(name,"%s.%d",mname,rank);
631       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
632       file = fopen(fname,"w");
633       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
634     } else {
635       file = PETSC_STDOUT;
636     }
637     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
638   }
639 #endif
640 
641   /*
642       Setup building of stack frames for all function calls
643   */
644 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
645   ierr = PetscStackCreate();CHKERRQ(ierr);
646 #endif
647 
648   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
649 
650 #if defined(PETSC_HAVE_PTHREADCLASSES)
651   /*
652       Determine whether user specified maximum number of threads
653    */
654   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
655 
656   ierr = PetscOptionsHasName(PETSC_NULL,"-main",&flg1);CHKERRQ(ierr);
657   if(flg1) {
658     cpu_set_t mset;
659     int icorr,ncorr = get_nprocs();
660     ierr = PetscOptionsGetInt(PETSC_NULL,"-main",&icorr,PETSC_NULL);CHKERRQ(ierr);
661     CPU_ZERO(&mset);
662     CPU_SET(icorr%ncorr,&mset);
663     sched_setaffinity(0,sizeof(cpu_set_t),&mset);
664   }
665 
666   PetscInt N_CORES = get_nprocs();
667   ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
668   char tstr[9];
669   char tbuf[2];
670   strcpy(tstr,"-thread");
671   for(i=0;i<PetscMaxThreads;i++) {
672     ThreadCoreAffinity[i] = i;
673     sprintf(tbuf,"%d",i);
674     strcat(tstr,tbuf);
675     ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
676     if(flg1) {
677       ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
678       ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
679     }
680     tstr[7] = '\0';
681   }
682 
683   /*
684       Determine whether to use thread pool
685    */
686   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
687   if (flg1) {
688     PetscUseThreadPool = PETSC_TRUE;
689     /* get the thread pool type */
690     PetscInt ipool = 0;
691     const char *choices[4] = {"true","tree","main","chain"};
692 
693     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
694     switch(ipool) {
695     case 1:
696       PetscThreadFunc       = &PetscThreadFunc_Tree;
697       PetscThreadInitialize = &PetscThreadInitialize_Tree;
698       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
699       MainWait              = &MainWait_Tree;
700       MainJob               = &MainJob_Tree;
701       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
702       break;
703     case 2:
704       PetscThreadFunc       = &PetscThreadFunc_Main;
705       PetscThreadInitialize = &PetscThreadInitialize_Main;
706       PetscThreadFinalize   = &PetscThreadFinalize_Main;
707       MainWait              = &MainWait_Main;
708       MainJob               = &MainJob_Main;
709       PetscInfo(PETSC_NULL,"Using main thread pool\n");
710       break;
711 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
712     case 3:
713 #else
714     default:
715 #endif
716       PetscThreadFunc       = &PetscThreadFunc_Chain;
717       PetscThreadInitialize = &PetscThreadInitialize_Chain;
718       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
719       MainWait              = &MainWait_Chain;
720       MainJob               = &MainJob_Chain;
721       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
722       break;
723 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
724     default:
725       PetscThreadFunc       = &PetscThreadFunc_True;
726       PetscThreadInitialize = &PetscThreadInitialize_True;
727       PetscThreadFinalize   = &PetscThreadFinalize_True;
728       MainWait              = &MainWait_True;
729       MainJob               = &MainJob_True;
730       PetscInfo(PETSC_NULL,"Using true thread pool\n");
731       break;
732 #endif
733     }
734     PetscThreadInitialize(PetscMaxThreads);
735   } else {
736     //need to define these in the case on 'no threads' or 'thread create/destroy'
737     //could take any of the above versions
738     MainJob               = &MainJob_Spawn;
739   }
740 #endif
741   /*
742        Print basic help message
743   */
744   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
745   if (flg1) {
746     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
763     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
764     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
768     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
769     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
770     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
771     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
773     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
776     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
777     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
778     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
779     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
780 #if defined(PETSC_USE_LOG)
781     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
782     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
783     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
784 #if defined(PETSC_HAVE_MPE)
785     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
786 #endif
787     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
788 #endif
789     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
790     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
791     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
792     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
793   }
794 
795   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
796   if (flg1) {
797     ierr = PetscSleep(si);CHKERRQ(ierr);
798   }
799 
800   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
801   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
802   if (f) {
803     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
804   }
805 
806 #if defined(PETSC_HAVE_CUSP)
807   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
808   if (flg3) flg1 = PETSC_TRUE;
809   else flg1 = PETSC_FALSE;
810   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
811   if (flg1) synchronizeCUSP = PETSC_TRUE;
812 #endif
813 
814   PetscFunctionReturn(0);
815 }
816 
817 #if defined(PETSC_HAVE_PTHREADCLASSES)
818 
819 /**** 'Tree' Thread Pool Functions ****/
820 void* PetscThreadFunc_Tree(void* arg) {
821   PetscErrorCode iterr;
822   int icorr,ierr;
823   int* pId = (int*)arg;
824   int ThreadId = *pId,Mary = 2,i,SubWorker;
825   PetscBool PeeOn;
826   cpu_set_t mset;
827   //printf("Thread %d In Tree Thread Function\n",ThreadId);
828   icorr = ThreadCoreAffinity[ThreadId];
829   CPU_ZERO(&mset);
830   CPU_SET(icorr,&mset);
831   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
832 
833   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
834     PeeOn = PETSC_TRUE;
835   }
836   else {
837     PeeOn = PETSC_FALSE;
838   }
839   if(PeeOn==PETSC_FALSE) {
840     /* check your subordinates, wait for them to be ready */
841     for(i=1;i<=Mary;i++) {
842       SubWorker = Mary*ThreadId+i;
843       if(SubWorker<PetscMaxThreads) {
844         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
845         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
846           /* upon entry, automically releases the lock and blocks
847            upon return, has the lock */
848           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
849         }
850         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
851       }
852     }
853     /* your subordinates are now ready */
854   }
855   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
856   /* update your ready status */
857   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
858   if(ThreadId==0) {
859     job_tree.eJobStat = JobCompleted;
860     /* ignal main */
861     ierr = pthread_cond_signal(&main_cond);
862   }
863   else {
864     /* tell your boss that you're ready to work */
865     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
866   }
867   /* the while loop needs to have an exit
868   the 'main' thread can terminate all the threads by performing a broadcast
869    and calling FuncFinish */
870   while(PetscThreadGo) {
871     /*need to check the condition to ensure we don't have to wait
872       waiting when you don't have to causes problems
873      also need to check the condition to ensure proper handling of spurious wakeups */
874     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
875       /* upon entry, automically releases the lock and blocks
876        upon return, has the lock */
877         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
878 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
879 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
880     }
881     if(ThreadId==0) {
882       job_tree.startJob = PETSC_FALSE;
883       job_tree.eJobStat = ThreadsWorking;
884     }
885     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
886     if(PeeOn==PETSC_FALSE) {
887       /* tell your subordinates it's time to get to work */
888       for(i=1; i<=Mary; i++) {
889 	SubWorker = Mary*ThreadId+i;
890         if(SubWorker<PetscMaxThreads) {
891           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
892         }
893       }
894     }
895     /* do your job */
896     if(job_tree.pdata==NULL) {
897       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
898     }
899     else {
900       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
901     }
902     if(iterr!=0) {
903       ithreaderr = 1;
904     }
905     if(PetscThreadGo) {
906       /* reset job, get ready for more */
907       if(PeeOn==PETSC_FALSE) {
908         /* check your subordinates, waiting for them to be ready
909          how do you know for a fact that a given subordinate has actually started? */
910 	for(i=1;i<=Mary;i++) {
911 	  SubWorker = Mary*ThreadId+i;
912           if(SubWorker<PetscMaxThreads) {
913             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
914             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
915               /* upon entry, automically releases the lock and blocks
916                upon return, has the lock */
917               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
918             }
919             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
920           }
921 	}
922         /* your subordinates are now ready */
923       }
924       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
925       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
926       if(ThreadId==0) {
927 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
928         /* root thread signals 'main' */
929         ierr = pthread_cond_signal(&main_cond);
930       }
931       else {
932         /* signal your boss before you go to sleep */
933         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
934       }
935     }
936   }
937   return NULL;
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "PetscThreadInitialize_Tree"
942 void* PetscThreadInitialize_Tree(PetscInt N) {
943   PetscInt i,ierr;
944   int status;
945 
946   if(PetscUseThreadPool) {
947     size_t Val1 = (size_t)CACHE_LINE_SIZE;
948     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
949     arrmutex = (char*)memalign(Val1,Val2);
950     arrcond1 = (char*)memalign(Val1,Val2);
951     arrcond2 = (char*)memalign(Val1,Val2);
952     arrstart = (char*)memalign(Val1,Val2);
953     arrready = (char*)memalign(Val1,Val2);
954     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
955     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
956     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
957     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
958     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
959     /* initialize job structure */
960     for(i=0; i<PetscMaxThreads; i++) {
961       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
962       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
963       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
964       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
965       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
966     }
967     for(i=0; i<PetscMaxThreads; i++) {
968       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
969       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
970       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
971       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
972       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
973     }
974     job_tree.pfunc = NULL;
975     job_tree.pdata = (void**)malloc(N*sizeof(void*));
976     job_tree.startJob = PETSC_FALSE;
977     job_tree.eJobStat = JobInitiated;
978     pVal = (int*)malloc(N*sizeof(int));
979     /* allocate memory in the heap for the thread structure */
980     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
981     /* create threads */
982     for(i=0; i<N; i++) {
983       pVal[i] = i;
984       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
985       /* should check status */
986     }
987   }
988   return NULL;
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "PetscThreadFinalize_Tree"
993 PetscErrorCode PetscThreadFinalize_Tree() {
994   int i,ierr;
995   void* jstatus;
996 
997   PetscFunctionBegin;
998 
999   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1000   /* join the threads */
1001   for(i=0; i<PetscMaxThreads; i++) {
1002     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1003     /* do error checking*/
1004   }
1005   free(PetscThreadPoint);
1006   free(arrmutex);
1007   free(arrcond1);
1008   free(arrcond2);
1009   free(arrstart);
1010   free(arrready);
1011   free(job_tree.pdata);
1012   free(pVal);
1013 
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MainWait_Tree"
1019 void MainWait_Tree() {
1020   int ierr;
1021   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1022   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1023     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1024   }
1025   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MainJob_Tree"
1030 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1031   int i,ierr;
1032   PetscErrorCode ijoberr = 0;
1033 
1034   MainWait();
1035   job_tree.pfunc = pFunc;
1036   job_tree.pdata = data;
1037   job_tree.startJob = PETSC_TRUE;
1038   for(i=0; i<PetscMaxThreads; i++) {
1039     *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1040   }
1041   job_tree.eJobStat = JobInitiated;
1042   ierr = pthread_cond_signal(job_tree.cond2array[0]);
1043   if(pFunc!=FuncFinish) {
1044     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1045   }
1046 
1047   if(ithreaderr) {
1048     ijoberr = ithreaderr;
1049   }
1050   return ijoberr;
1051 }
1052 /****  ****/
1053 
1054 /**** 'Main' Thread Pool Functions ****/
1055 void* PetscThreadFunc_Main(void* arg) {
1056   PetscErrorCode iterr;
1057   int icorr,ierr;
1058   int* pId = (int*)arg;
1059   int ThreadId = *pId;
1060   cpu_set_t mset;
1061   //printf("Thread %d In Main Thread Function\n",ThreadId);
1062   icorr = ThreadCoreAffinity[ThreadId];
1063   CPU_ZERO(&mset);
1064   CPU_SET(icorr,&mset);
1065   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1066 
1067   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1068   /* update your ready status */
1069   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1070   /* tell the BOSS that you're ready to work before you go to sleep */
1071   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1072 
1073   /* the while loop needs to have an exit
1074      the 'main' thread can terminate all the threads by performing a broadcast
1075      and calling FuncFinish */
1076   while(PetscThreadGo) {
1077     /* need to check the condition to ensure we don't have to wait
1078        waiting when you don't have to causes problems
1079      also need to check the condition to ensure proper handling of spurious wakeups */
1080     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1081       /* upon entry, atomically releases the lock and blocks
1082        upon return, has the lock */
1083         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1084 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1085     }
1086     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1087     if(job_main.pdata==NULL) {
1088       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1089     }
1090     else {
1091       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1092     }
1093     if(iterr!=0) {
1094       ithreaderr = 1;
1095     }
1096     if(PetscThreadGo) {
1097       /* reset job, get ready for more */
1098       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1099       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1100       /* tell the BOSS that you're ready to work before you go to sleep */
1101       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1102     }
1103   }
1104   return NULL;
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "PetscThreadInitialize_Main"
1109 void* PetscThreadInitialize_Main(PetscInt N) {
1110   PetscInt i,ierr;
1111   int status;
1112 
1113   if(PetscUseThreadPool) {
1114     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1115     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1116     arrmutex = (char*)memalign(Val1,Val2);
1117     arrcond1 = (char*)memalign(Val1,Val2);
1118     arrcond2 = (char*)memalign(Val1,Val2);
1119     arrstart = (char*)memalign(Val1,Val2);
1120     arrready = (char*)memalign(Val1,Val2);
1121     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1122     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1123     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1124     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1125     /* initialize job structure */
1126     for(i=0; i<PetscMaxThreads; i++) {
1127       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1128       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1129       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1130       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1131     }
1132     for(i=0; i<PetscMaxThreads; i++) {
1133       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1134       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1135       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1136       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1137     }
1138     job_main.pfunc = NULL;
1139     job_main.pdata = (void**)malloc(N*sizeof(void*));
1140     pVal = (int*)malloc(N*sizeof(int));
1141     /* allocate memory in the heap for the thread structure */
1142     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1143     /* create threads */
1144     for(i=0; i<N; i++) {
1145       pVal[i] = i;
1146       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1147       /* error check */
1148     }
1149   }
1150   else {
1151   }
1152   return NULL;
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "PetscThreadFinalize_Main"
1157 PetscErrorCode PetscThreadFinalize_Main() {
1158   int i,ierr;
1159   void* jstatus;
1160 
1161   PetscFunctionBegin;
1162 
1163   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1164   /* join the threads */
1165   for(i=0; i<PetscMaxThreads; i++) {
1166     ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1167   }
1168   free(PetscThreadPoint);
1169   free(arrmutex);
1170   free(arrcond1);
1171   free(arrcond2);
1172   free(arrstart);
1173   free(arrready);
1174   free(job_main.pdata);
1175   free(pVal);
1176 
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MainWait_Main"
1182 void MainWait_Main() {
1183   int i,ierr;
1184   for(i=0; i<PetscMaxThreads; i++) {
1185     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1186     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1187       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1188     }
1189     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1190   }
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MainJob_Main"
1195 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1196   int i,ierr;
1197   PetscErrorCode ijoberr = 0;
1198 
1199   MainWait(); /* you know everyone is waiting to be signalled! */
1200   job_main.pfunc = pFunc;
1201   job_main.pdata = data;
1202   for(i=0; i<PetscMaxThreads; i++) {
1203     *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1204   }
1205   /* tell the threads to go to work */
1206   for(i=0; i<PetscMaxThreads; i++) {
1207     ierr = pthread_cond_signal(job_main.cond2array[i]);
1208   }
1209   if(pFunc!=FuncFinish) {
1210     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1211   }
1212 
1213   if(ithreaderr) {
1214     ijoberr = ithreaderr;
1215   }
1216   return ijoberr;
1217 }
1218 /****  ****/
1219 
1220 /**** Chain Thread Functions ****/
1221 void* PetscThreadFunc_Chain(void* arg) {
1222   PetscErrorCode iterr;
1223   int icorr,ierr;
1224   int* pId = (int*)arg;
1225   int ThreadId = *pId;
1226   int SubWorker = ThreadId + 1;
1227   PetscBool PeeOn;
1228   cpu_set_t mset;
1229   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1230   icorr = ThreadCoreAffinity[ThreadId];
1231   CPU_ZERO(&mset);
1232   CPU_SET(icorr,&mset);
1233   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1234 
1235   if(ThreadId==(PetscMaxThreads-1)) {
1236     PeeOn = PETSC_TRUE;
1237   }
1238   else {
1239     PeeOn = PETSC_FALSE;
1240   }
1241   if(PeeOn==PETSC_FALSE) {
1242     /* check your subordinate, wait for him to be ready */
1243     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1244     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1245       /* upon entry, automically releases the lock and blocks
1246        upon return, has the lock */
1247       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1248     }
1249     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1250     /* your subordinate is now ready*/
1251   }
1252   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1253   /* update your ready status */
1254   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1255   if(ThreadId==0) {
1256     job_chain.eJobStat = JobCompleted;
1257     /* signal main */
1258     ierr = pthread_cond_signal(&main_cond);
1259   }
1260   else {
1261     /* tell your boss that you're ready to work */
1262     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1263   }
1264   /*  the while loop needs to have an exit
1265      the 'main' thread can terminate all the threads by performing a broadcast
1266    and calling FuncFinish */
1267   while(PetscThreadGo) {
1268     /* need to check the condition to ensure we don't have to wait
1269        waiting when you don't have to causes problems
1270      also need to check the condition to ensure proper handling of spurious wakeups */
1271     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1272       /*upon entry, automically releases the lock and blocks
1273        upon return, has the lock */
1274         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1275 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1276 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1277     }
1278     if(ThreadId==0) {
1279       job_chain.startJob = PETSC_FALSE;
1280       job_chain.eJobStat = ThreadsWorking;
1281     }
1282     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1283     if(PeeOn==PETSC_FALSE) {
1284       /* tell your subworker it's time to get to work */
1285       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1286     }
1287     /* do your job */
1288     if(job_chain.pdata==NULL) {
1289       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1290     }
1291     else {
1292       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1293     }
1294     if(iterr!=0) {
1295       ithreaderr = 1;
1296     }
1297     if(PetscThreadGo) {
1298       /* reset job, get ready for more */
1299       if(PeeOn==PETSC_FALSE) {
1300         /* check your subordinate, wait for him to be ready
1301          how do you know for a fact that your subordinate has actually started? */
1302         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1303         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1304           /* upon entry, automically releases the lock and blocks
1305            upon return, has the lock */
1306           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1307         }
1308         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1309         /* your subordinate is now ready */
1310       }
1311       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1312       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1313       if(ThreadId==0) {
1314 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1315         /* root thread (foreman) signals 'main' */
1316         ierr = pthread_cond_signal(&main_cond);
1317       }
1318       else {
1319         /* signal your boss before you go to sleep */
1320         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1321       }
1322     }
1323   }
1324   return NULL;
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "PetscThreadInitialize_Chain"
1329 void* PetscThreadInitialize_Chain(PetscInt N) {
1330   PetscInt i,ierr;
1331   int status;
1332 
1333   if(PetscUseThreadPool) {
1334     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1335     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1336     arrmutex = (char*)memalign(Val1,Val2);
1337     arrcond1 = (char*)memalign(Val1,Val2);
1338     arrcond2 = (char*)memalign(Val1,Val2);
1339     arrstart = (char*)memalign(Val1,Val2);
1340     arrready = (char*)memalign(Val1,Val2);
1341     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1342     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1343     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1344     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1345     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1346     /* initialize job structure */
1347     for(i=0; i<PetscMaxThreads; i++) {
1348       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1349       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1350       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1351       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1352       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1353     }
1354     for(i=0; i<PetscMaxThreads; i++) {
1355       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1356       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1357       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1358       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1359       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1360     }
1361     job_chain.pfunc = NULL;
1362     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1363     job_chain.startJob = PETSC_FALSE;
1364     job_chain.eJobStat = JobInitiated;
1365     pVal = (int*)malloc(N*sizeof(int));
1366     /* allocate memory in the heap for the thread structure */
1367     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1368     /* create threads */
1369     for(i=0; i<N; i++) {
1370       pVal[i] = i;
1371       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1372       /* should check error */
1373     }
1374   }
1375   else {
1376   }
1377   return NULL;
1378 }
1379 
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "PetscThreadFinalize_Chain"
1383 PetscErrorCode PetscThreadFinalize_Chain() {
1384   int i,ierr;
1385   void* jstatus;
1386 
1387   PetscFunctionBegin;
1388 
1389   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1390   /* join the threads */
1391   for(i=0; i<PetscMaxThreads; i++) {
1392     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1393     /* should check error */
1394   }
1395   free(PetscThreadPoint);
1396   free(arrmutex);
1397   free(arrcond1);
1398   free(arrcond2);
1399   free(arrstart);
1400   free(arrready);
1401   free(job_chain.pdata);
1402   free(pVal);
1403 
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MainWait_Chain"
1409 void MainWait_Chain() {
1410   int ierr;
1411   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1412   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1413     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1414   }
1415   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MainJob_Chain"
1420 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1421   int i,ierr;
1422   PetscErrorCode ijoberr = 0;
1423 
1424   MainWait();
1425   job_chain.pfunc = pFunc;
1426   job_chain.pdata = data;
1427   job_chain.startJob = PETSC_TRUE;
1428   for(i=0; i<PetscMaxThreads; i++) {
1429     *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1430   }
1431   job_chain.eJobStat = JobInitiated;
1432   ierr = pthread_cond_signal(job_chain.cond2array[0]);
1433   if(pFunc!=FuncFinish) {
1434     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1435   }
1436 
1437   if(ithreaderr) {
1438     ijoberr = ithreaderr;
1439   }
1440   return ijoberr;
1441 }
1442 /****  ****/
1443 
1444 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1445 /**** True Thread Functions ****/
1446 void* PetscThreadFunc_True(void* arg) {
1447   int icorr,ierr,iVal;
1448   int* pId = (int*)arg;
1449   int ThreadId = *pId;
1450   PetscErrorCode iterr;
1451   cpu_set_t mset;
1452   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1453   icorr = ThreadCoreAffinity[ThreadId];
1454   CPU_ZERO(&mset);
1455   CPU_SET(icorr,&mset);
1456   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1457 
1458   ierr = pthread_mutex_lock(&job_true.mutex);
1459   job_true.iNumReadyThreads++;
1460   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1461     ierr = pthread_cond_signal(&main_cond);
1462   }
1463   /*the while loop needs to have an exit
1464     the 'main' thread can terminate all the threads by performing a broadcast
1465    and calling FuncFinish */
1466   while(PetscThreadGo) {
1467     /*need to check the condition to ensure we don't have to wait
1468       waiting when you don't have to causes problems
1469      also need to wait if another thread sneaks in and messes with the predicate */
1470     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1471       /* upon entry, automically releases the lock and blocks
1472        upon return, has the lock */
1473       //printf("Thread %d Going to Sleep!\n",ThreadId);
1474       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1475     }
1476     job_true.startJob = PETSC_FALSE;
1477     job_true.iNumJobThreads--;
1478     job_true.iNumReadyThreads--;
1479     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1480     pthread_mutex_unlock(&job_true.mutex);
1481     if(job_true.pdata==NULL) {
1482       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1483     }
1484     else {
1485       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1486     }
1487     if(iterr!=0) {
1488       ithreaderr = 1;
1489     }
1490     //printf("Thread %d Finished Job\n",ThreadId);
1491     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1492       what happens if a thread finishes before they all start? BAD!
1493      what happens if a thread finishes before any else start? BAD! */
1494     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1495     /* reset job */
1496     if(PetscThreadGo) {
1497       pthread_mutex_lock(&job_true.mutex);
1498       job_true.iNumReadyThreads++;
1499       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1500 	/* signal the 'main' thread that the job is done! (only done once) */
1501 	ierr = pthread_cond_signal(&main_cond);
1502       }
1503     }
1504   }
1505   return NULL;
1506 }
1507 
1508 #undef __FUNCT__
1509 #define __FUNCT__ "PetscThreadInitialize_True"
1510 void* PetscThreadInitialize_True(PetscInt N) {
1511   PetscInt i;
1512   int status;
1513 
1514   pVal = (int*)malloc(N*sizeof(int));
1515   /* allocate memory in the heap for the thread structure */
1516   PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1517   BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1518   job_true.pdata = (void**)malloc(N*sizeof(void*));
1519   for(i=0; i<N; i++) {
1520     pVal[i] = i;
1521     status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1522     /* error check to ensure proper thread creation */
1523     status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1524     /* should check error */
1525   }
1526   //printf("Finished True Thread Pool Initialization\n");
1527   return NULL;
1528 }
1529 
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "PetscThreadFinalize_True"
1533 PetscErrorCode PetscThreadFinalize_True() {
1534   int i,ierr;
1535   void* jstatus;
1536 
1537   PetscFunctionBegin;
1538 
1539   MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1540   /* join the threads */
1541   for(i=0; i<PetscMaxThreads; i++) {
1542     ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1543   }
1544   free(BarrPoint);
1545   free(PetscThreadPoint);
1546 
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MainWait_True"
1552 void MainWait_True() {
1553   int ierr;
1554   ierr = pthread_mutex_lock(&job_true.mutex);
1555   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1556     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1557   }
1558   ierr = pthread_mutex_unlock(&job_true.mutex);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MainJob_True"
1563 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1564   int ierr;
1565   PetscErrorCode ijoberr = 0;
1566 
1567   MainWait();
1568   job_true.pfunc = pFunc;
1569   job_true.pdata = data;
1570   job_true.pbarr = &BarrPoint[n];
1571   job_true.iNumJobThreads = n;
1572   job_true.startJob = PETSC_TRUE;
1573   ierr = pthread_cond_broadcast(&job_true.cond);
1574   if(pFunc!=FuncFinish) {
1575     MainWait(); /* why wait after? guarantees that job gets done */
1576   }
1577 
1578   if(ithreaderr) {
1579     ijoberr = ithreaderr;
1580   }
1581   return ijoberr;
1582 }
1583 /**** NO THREAD POOL FUNCTION ****/
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MainJob_Spawn"
1586 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1587   PetscErrorCode ijoberr = 0;
1588 
1589   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1590   PetscThreadPoint = apThread; /* point to same place */
1591   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1592   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1593   free(apThread);
1594 
1595   return ijoberr;
1596 }
1597 /****  ****/
1598 #endif
1599 
1600 void* FuncFinish(void* arg) {
1601   PetscThreadGo = PETSC_FALSE;
1602   return(0);
1603 }
1604 
1605 #endif
1606