xref: /petsc/src/sys/objects/init.c (revision c370d2836f03fe5908db7480771ef42d2b45d254)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #define _GNU_SOURCE
10 #include <sched.h>
11 #include <petscsys.h>        /*I  "petscsys.h"   I*/
12 #if defined(PETSC_USE_PTHREAD)
13 #include <pthread.h>
14 #endif
15 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
16 #include <sys/sysinfo.h>
17 #endif
18 #include <unistd.h>
19 #if defined(PETSC_HAVE_STDLIB_H)
20 #include <stdlib.h>
21 #endif
22 #if defined(PETSC_HAVE_MALLOC_H)
23 #include <malloc.h>
24 #endif
25 #if defined(PETSC_HAVE_VALGRIND)
26 #include <valgrind/valgrind.h>
27 #endif
28 
29 /* ------------------------Nasty global variables -------------------------------*/
30 /*
31      Indicates if PETSc started up MPI, or it was
32    already started before PETSc was initialized.
33 */
34 PetscBool    PetscBeganMPI         = PETSC_FALSE;
35 PetscBool    PetscInitializeCalled = PETSC_FALSE;
36 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
37 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
38 PetscBool    PetscThreadGo         = PETSC_TRUE;
39 PetscMPIInt  PetscGlobalRank = -1;
40 PetscMPIInt  PetscGlobalSize = -1;
41 
42 #if defined(PETSC_USE_PTHREAD_CLASSES)
43 PetscMPIInt  PetscMaxThreads = 2;
44 pthread_t*   PetscThreadPoint;
45 #define PETSC_HAVE_PTHREAD_BARRIER
46 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
47 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
48 #endif
49 PetscErrorCode ithreaderr = 0;
50 int*         pVal;
51 
52 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
53 int* ThreadCoreAffinity;
54 
55 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
56 
57 typedef struct {
58   pthread_mutex_t** mutexarray;
59   pthread_cond_t**  cond1array;
60   pthread_cond_t** cond2array;
61   void* (*pfunc)(void*);
62   void** pdata;
63   PetscBool startJob;
64   estat eJobStat;
65   PetscBool** arrThreadStarted;
66   PetscBool** arrThreadReady;
67 } sjob_tree;
68 sjob_tree job_tree;
69 typedef struct {
70   pthread_mutex_t** mutexarray;
71   pthread_cond_t**  cond1array;
72   pthread_cond_t** cond2array;
73   void* (*pfunc)(void*);
74   void** pdata;
75   PetscBool** arrThreadReady;
76 } sjob_main;
77 sjob_main job_main;
78 typedef struct {
79   pthread_mutex_t** mutexarray;
80   pthread_cond_t**  cond1array;
81   pthread_cond_t** cond2array;
82   void* (*pfunc)(void*);
83   void** pdata;
84   PetscBool startJob;
85   estat eJobStat;
86   PetscBool** arrThreadStarted;
87   PetscBool** arrThreadReady;
88 } sjob_chain;
89 sjob_chain job_chain;
90 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
91 typedef struct {
92   pthread_mutex_t mutex;
93   pthread_cond_t cond;
94   void* (*pfunc)(void*);
95   void** pdata;
96   pthread_barrier_t* pbarr;
97   int iNumJobThreads;
98   int iNumReadyThreads;
99   PetscBool startJob;
100 } sjob_true;
101 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
102 #endif
103 
104 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
105 char* arrmutex; /* used by 'chain','main','tree' thread pools */
106 char* arrcond1; /* used by 'chain','main','tree' thread pools */
107 char* arrcond2; /* used by 'chain','main','tree' thread pools */
108 char* arrstart; /* used by 'chain','main','tree' thread pools */
109 char* arrready; /* used by 'chain','main','tree' thread pools */
110 
111 /* Function Pointers */
112 void*          (*PetscThreadFunc)(void*) = NULL;
113 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
114 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
115 void           (*MainWait)(void) = NULL;
116 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
117 /**** Tree Functions ****/
118 void*          PetscThreadFunc_Tree(void*);
119 void*          PetscThreadInitialize_Tree(PetscInt);
120 PetscErrorCode PetscThreadFinalize_Tree(void);
121 void           MainWait_Tree(void);
122 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
123 /**** Main Functions ****/
124 void*          PetscThreadFunc_Main(void*);
125 void*          PetscThreadInitialize_Main(PetscInt);
126 PetscErrorCode PetscThreadFinalize_Main(void);
127 void           MainWait_Main(void);
128 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
129 /**** Chain Functions ****/
130 void*          PetscThreadFunc_Chain(void*);
131 void*          PetscThreadInitialize_Chain(PetscInt);
132 PetscErrorCode PetscThreadFinalize_Chain(void);
133 void           MainWait_Chain(void);
134 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
135 /**** True Functions ****/
136 void*          PetscThreadFunc_True(void*);
137 void*          PetscThreadInitialize_True(PetscInt);
138 PetscErrorCode PetscThreadFinalize_True(void);
139 void           MainWait_True(void);
140 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
141 /****  ****/
142 
143 void* FuncFinish(void*);
144 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
145 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
146 #endif
147 
148 #if defined(PETSC_USE_COMPLEX)
149 #if defined(PETSC_COMPLEX_INSTANTIATE)
150 template <> class std::complex<double>; /* instantiate complex template class */
151 #endif
152 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
153 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
154 MPI_Datatype   MPI_C_COMPLEX;
155 #endif
156 PetscScalar    PETSC_i;
157 #else
158 PetscScalar    PETSC_i = 0.0;
159 #endif
160 #if defined(PETSC_USE_REAL___FLOAT128)
161 MPI_Datatype   MPIU___FLOAT128 = 0;
162 #endif
163 MPI_Datatype   MPIU_2SCALAR = 0;
164 MPI_Datatype   MPIU_2INT = 0;
165 
166 /*
167      These are needed by petscbt.h
168 */
169 #include <petscbt.h>
170 char      _BT_mask = ' ';
171 char      _BT_c = ' ';
172 PetscInt  _BT_idx  = 0;
173 
174 /*
175        Function that is called to display all error messages
176 */
177 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
178 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
179 #if defined(PETSC_HAVE_MATLAB_ENGINE)
180 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
181 #else
182 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
183 #endif
184 /*
185   This is needed to turn on/off cusp synchronization */
186 PetscBool   synchronizeCUSP = PETSC_FALSE;
187 
188 /* ------------------------------------------------------------------------------*/
189 /*
190    Optional file where all PETSc output from various prints is saved
191 */
192 FILE *petsc_history = PETSC_NULL;
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "PetscOpenHistoryFile"
196 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
197 {
198   PetscErrorCode ierr;
199   PetscMPIInt    rank,size;
200   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
201   char           version[256];
202 
203   PetscFunctionBegin;
204   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
205   if (!rank) {
206     char        arch[10];
207     int         err;
208     PetscViewer viewer;
209 
210     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
211     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
212     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
213     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
214     if (filename) {
215       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
216     } else {
217       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
218       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
219       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
220     }
221 
222     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
223     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
224     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
225     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
226     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
227     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
228     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
229     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
230     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
231     err = fflush(*fd);
232     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "PetscCloseHistoryFile"
239 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
240 {
241   PetscErrorCode ierr;
242   PetscMPIInt    rank;
243   char           date[64];
244   int            err;
245 
246   PetscFunctionBegin;
247   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
248   if (!rank) {
249     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
250     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
251     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
252     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
253     err = fflush(*fd);
254     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
255     err = fclose(*fd);
256     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 /* ------------------------------------------------------------------------------*/
262 
263 /*
264    This is ugly and probably belongs somewhere else, but I want to
265   be able to put a true MPI abort error handler with command line args.
266 
267     This is so MPI errors in the debugger will leave all the stack
268   frames. The default MP_Abort() cleans up and exits thus providing no useful information
269   in the debugger hence we call abort() instead of MPI_Abort().
270 */
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "Petsc_MPI_AbortOnError"
274 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
275 {
276   PetscFunctionBegin;
277   (*PetscErrorPrintf)("MPI error %d\n",*flag);
278   abort();
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
283 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
284 {
285   PetscErrorCode ierr;
286 
287   PetscFunctionBegin;
288   (*PetscErrorPrintf)("MPI error %d\n",*flag);
289   ierr = PetscAttachDebugger();
290   if (ierr) { /* hopeless so get out */
291     MPI_Abort(*comm,*flag);
292   }
293 }
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "PetscEnd"
297 /*@C
298    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
299      wishes a clean exit somewhere deep in the program.
300 
301    Collective on PETSC_COMM_WORLD
302 
303    Options Database Keys are the same as for PetscFinalize()
304 
305    Level: advanced
306 
307    Note:
308    See PetscInitialize() for more general runtime options.
309 
310 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
311 @*/
312 PetscErrorCode  PetscEnd(void)
313 {
314   PetscFunctionBegin;
315   PetscFinalize();
316   exit(0);
317   return 0;
318 }
319 
320 PetscBool    PetscOptionsPublish = PETSC_FALSE;
321 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
322 extern PetscBool  petscsetmallocvisited;
323 static char       emacsmachinename[256];
324 
325 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
326 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "PetscSetHelpVersionFunctions"
330 /*@C
331    PetscSetHelpVersionFunctions - Sets functions that print help and version information
332    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
333    This routine enables a "higher-level" package that uses PETSc to print its messages first.
334 
335    Input Parameter:
336 +  help - the help function (may be PETSC_NULL)
337 -  version - the version function (may be PETSC_NULL)
338 
339    Level: developer
340 
341    Concepts: package help message
342 
343 @*/
344 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
345 {
346   PetscFunctionBegin;
347   PetscExternalHelpFunction    = help;
348   PetscExternalVersionFunction = version;
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
354 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
355 {
356   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
357   MPI_Comm       comm = PETSC_COMM_WORLD;
358   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
359   PetscErrorCode ierr;
360   PetscReal      si;
361   int            i;
362   PetscMPIInt    rank;
363   char           version[256];
364 
365   PetscFunctionBegin;
366   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
367 
368   /*
369       Setup the memory management; support for tracing malloc() usage
370   */
371   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
372 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
373   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
374   if ((!flg2 || flg1) && !petscsetmallocvisited) {
375 #if defined(PETSC_HAVE_VALGRIND)
376     if (flg2 || !(RUNNING_ON_VALGRIND)) {
377       /* turn off default -malloc if valgrind is being used */
378 #endif
379       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
380 #if defined(PETSC_HAVE_VALGRIND)
381     }
382 #endif
383   }
384 #else
385   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
386   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
387   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
388 #endif
389   if (flg3) {
390     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
391   }
392   flg1 = PETSC_FALSE;
393   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
394   if (flg1) {
395     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
396     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
397   }
398 
399   flg1 = PETSC_FALSE;
400   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
401   if (!flg1) {
402     flg1 = PETSC_FALSE;
403     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
404   }
405   if (flg1) {
406     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
407   }
408 
409   /*
410       Set the display variable for graphics
411   */
412   ierr = PetscSetDisplay();CHKERRQ(ierr);
413 
414 
415   /*
416   else {
417     //need to define these in the case on 'no threads' or 'thread create/destroy'
418     //could take any of the above versions
419     PetscThreadInitialize = &PetscThreadInitialize_True;
420     PetscThreadFinalize   = &PetscThreadFinalize_True;
421     MainJob               = &MainJob_True;
422   }
423       Print the PETSc version information
424   */
425   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
426   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
427   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
428   if (flg1 || flg2 || flg3){
429 
430     /*
431        Print "higher-level" package version message
432     */
433     if (PetscExternalVersionFunction) {
434       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
435     }
436 
437     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
439 ------------------------------\n");CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
441     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
442     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
443     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
444     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
445     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
446     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
447 ------------------------------\n");CHKERRQ(ierr);
448   }
449 
450   /*
451        Print "higher-level" package help message
452   */
453   if (flg3){
454     if (PetscExternalHelpFunction) {
455       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
456     }
457   }
458 
459   /*
460       Setup the error handling
461   */
462   flg1 = PETSC_FALSE;
463   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
464   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
465   flg1 = PETSC_FALSE;
466   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
467   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
468   flg1 = PETSC_FALSE;
469   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
470   if (flg1) {
471     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
472   }
473   flg1 = PETSC_FALSE;
474   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
475   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
476   flg1 = PETSC_FALSE;
477   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
478   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
479 
480   /*
481       Setup debugger information
482   */
483   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
484   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
485   if (flg1) {
486     MPI_Errhandler err_handler;
487 
488     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
489     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
490     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
491     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
492   }
493   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
494   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
495   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
496   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
497   if (flg1 || flg2) {
498     PetscMPIInt    size;
499     PetscInt       lsize,*nodes;
500     MPI_Errhandler err_handler;
501     /*
502        we have to make sure that all processors have opened
503        connections to all other processors, otherwise once the
504        debugger has stated it is likely to receive a SIGUSR1
505        and kill the program.
506     */
507     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
508     if (size > 2) {
509       PetscMPIInt dummy = 0;
510       MPI_Status  status;
511       for (i=0; i<size; i++) {
512         if (rank != i) {
513           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
514         }
515       }
516       for (i=0; i<size; i++) {
517         if (rank != i) {
518           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
519         }
520       }
521     }
522     /* check if this processor node should be in debugger */
523     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
524     lsize = size;
525     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
526     if (flag) {
527       for (i=0; i<lsize; i++) {
528         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
529       }
530     }
531     if (!flag) {
532       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
533       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
534       if (flg1) {
535         ierr = PetscAttachDebugger();CHKERRQ(ierr);
536       } else {
537         ierr = PetscStopForDebugger();CHKERRQ(ierr);
538       }
539       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
540       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
541     }
542     ierr = PetscFree(nodes);CHKERRQ(ierr);
543   }
544 
545   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
546   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
547 
548 #if defined(PETSC_USE_SOCKET_VIEWER)
549   /*
550     Activates new sockets for zope if needed
551   */
552   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
553   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
554   if (flgz){
555     int  sockfd;
556     char hostname[256];
557     char username[256];
558     int  remoteport = 9999;
559 
560     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
561     if (!hostname[0]){
562       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
563     }
564     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
565     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
566     PETSC_ZOPEFD = fdopen(sockfd, "w");
567     if (flgzout){
568       PETSC_STDOUT = PETSC_ZOPEFD;
569       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
570       fprintf(PETSC_STDOUT, "<<<start>>>");
571     } else {
572       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
573       fprintf(PETSC_ZOPEFD, "<<<start>>>");
574     }
575   }
576 #endif
577 #if defined(PETSC_USE_SERVER)
578   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
579   if (flgz){
580     PetscInt port = PETSC_DECIDE;
581     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
582     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
583   }
584 #endif
585 
586   /*
587         Setup profiling and logging
588   */
589 #if defined (PETSC_USE_INFO)
590   {
591     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
592     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
593     if (flg1 && logname[0]) {
594       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
595     } else if (flg1) {
596       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
597     }
598   }
599 #endif
600 #if defined(PETSC_USE_LOG)
601   mname[0] = 0;
602   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
603   if (flg1) {
604     if (mname[0]) {
605       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
606     } else {
607       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
608     }
609   }
610 #if defined(PETSC_HAVE_MPE)
611   flg1 = PETSC_FALSE;
612   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
613   if (flg1) PetscLogMPEBegin();
614 #endif
615   flg1 = PETSC_FALSE;
616   flg2 = PETSC_FALSE;
617   flg3 = PETSC_FALSE;
618   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
619   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
620   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
621   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
622   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
623   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
624 
625   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
626   if (flg1) {
627     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
628     FILE *file;
629     if (mname[0]) {
630       sprintf(name,"%s.%d",mname,rank);
631       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
632       file = fopen(fname,"w");
633       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
634     } else {
635       file = PETSC_STDOUT;
636     }
637     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
638   }
639 #endif
640 
641   /*
642       Setup building of stack frames for all function calls
643   */
644 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
645   ierr = PetscStackCreate();CHKERRQ(ierr);
646 #endif
647 
648   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
649 
650 #if defined(PETSC_USE_PTHREAD_CLASSES)
651   /*
652       Determine whether user specified maximum number of threads
653    */
654   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
655 
656   /*
657       Determine whether to use thread pool
658    */
659   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
660   if (flg1) {
661     PetscUseThreadPool = PETSC_TRUE;
662     PetscInt N_CORES = get_nprocs();
663     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
664     char tstr[9];
665     char tbuf[2];
666     strcpy(tstr,"-thread");
667     for(i=0;i<PetscMaxThreads;i++) {
668       ThreadCoreAffinity[i] = i;
669       sprintf(tbuf,"%d",i);
670       strcat(tstr,tbuf);
671       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
672       if(flg1) {
673         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
674         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
675       }
676       tstr[7] = '\0';
677     }
678     /* get the thread pool type */
679     PetscInt ipool = 0;
680     const char *choices[4] = {"true","tree","main","chain"};
681 
682     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
683     switch(ipool) {
684     case 1:
685       PetscThreadFunc       = &PetscThreadFunc_Tree;
686       PetscThreadInitialize = &PetscThreadInitialize_Tree;
687       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
688       MainWait              = &MainWait_Tree;
689       MainJob               = &MainJob_Tree;
690       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
691       break;
692     case 2:
693       PetscThreadFunc       = &PetscThreadFunc_Main;
694       PetscThreadInitialize = &PetscThreadInitialize_Main;
695       PetscThreadFinalize   = &PetscThreadFinalize_Main;
696       MainWait              = &MainWait_Main;
697       MainJob               = &MainJob_Main;
698       PetscInfo(PETSC_NULL,"Using main thread pool\n");
699       break;
700 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
701     case 3:
702 #else
703     default:
704 #endif
705       PetscThreadFunc       = &PetscThreadFunc_Chain;
706       PetscThreadInitialize = &PetscThreadInitialize_Chain;
707       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
708       MainWait              = &MainWait_Chain;
709       MainJob               = &MainJob_Chain;
710       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
711       break;
712 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
713     default:
714       PetscThreadFunc       = &PetscThreadFunc_True;
715       PetscThreadInitialize = &PetscThreadInitialize_True;
716       PetscThreadFinalize   = &PetscThreadFinalize_True;
717       MainWait              = &MainWait_True;
718       MainJob               = &MainJob_True;
719       PetscInfo(PETSC_NULL,"Using true thread pool\n");
720       break;
721 #endif
722     }
723     PetscThreadInitialize(PetscMaxThreads);
724   }
725 #endif
726   /*
727        Print basic help message
728   */
729   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
730   if (flg1) {
731     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
732     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
733     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
734     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
735     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
736     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
737     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
738     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
739     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
740     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
741     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
742     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
743     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
763     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
764 #if defined(PETSC_USE_LOG)
765     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
768 #if defined(PETSC_HAVE_MPE)
769     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
770 #endif
771     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
772 #endif
773     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
776     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
777   }
778 
779   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
780   if (flg1) {
781     ierr = PetscSleep(si);CHKERRQ(ierr);
782   }
783 
784   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
785   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
786   if (f) {
787     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
788   }
789 
790 #if defined(PETSC_HAVE_CUSP)
791   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
792   if (flg3) flg1 = PETSC_TRUE;
793   else flg1 = PETSC_FALSE;
794   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
795   if (flg1) synchronizeCUSP = PETSC_TRUE;
796 #endif
797 
798   PetscFunctionReturn(0);
799 }
800 
801 #if defined(PETSC_USE_PTHREAD_CLASSES)
802 
803 /**** 'Tree' Thread Pool Functions ****/
804 void* PetscThreadFunc_Tree(void* arg) {
805   PetscErrorCode iterr;
806   int icorr,ierr;
807   int* pId = (int*)arg;
808   int ThreadId = *pId,Mary = 2,i,SubWorker;
809   PetscBool PeeOn;
810   cpu_set_t mset;
811   //printf("Thread %d In Tree Thread Function\n",ThreadId);
812   icorr = ThreadCoreAffinity[ThreadId];
813   CPU_ZERO(&mset);
814   CPU_SET(icorr,&mset);
815   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
816 
817   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
818     PeeOn = PETSC_TRUE;
819   }
820   else {
821     PeeOn = PETSC_FALSE;
822   }
823   if(PeeOn==PETSC_FALSE) {
824     /* check your subordinates, wait for them to be ready */
825     for(i=1;i<=Mary;i++) {
826       SubWorker = Mary*ThreadId+i;
827       if(SubWorker<PetscMaxThreads) {
828         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
829         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
830           /* upon entry, automically releases the lock and blocks
831            upon return, has the lock */
832           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
833         }
834         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
835       }
836     }
837     /* your subordinates are now ready */
838   }
839   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
840   /* update your ready status */
841   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
842   if(ThreadId==0) {
843     job_tree.eJobStat = JobCompleted;
844     /* ignal main */
845     ierr = pthread_cond_signal(&main_cond);
846   }
847   else {
848     /* tell your boss that you're ready to work */
849     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
850   }
851   /* the while loop needs to have an exit
852   the 'main' thread can terminate all the threads by performing a broadcast
853    and calling FuncFinish */
854   while(PetscThreadGo) {
855     /*need to check the condition to ensure we don't have to wait
856       waiting when you don't have to causes problems
857      also need to check the condition to ensure proper handling of spurious wakeups */
858     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
859       /* upon entry, automically releases the lock and blocks
860        upon return, has the lock */
861         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
862 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
863 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
864     }
865     if(ThreadId==0) {
866       job_tree.startJob = PETSC_FALSE;
867       job_tree.eJobStat = ThreadsWorking;
868     }
869     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
870     if(PeeOn==PETSC_FALSE) {
871       /* tell your subordinates it's time to get to work */
872       for(i=1; i<=Mary; i++) {
873 	SubWorker = Mary*ThreadId+i;
874         if(SubWorker<PetscMaxThreads) {
875           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
876         }
877       }
878     }
879     /* do your job */
880     if(job_tree.pdata==NULL) {
881       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
882     }
883     else {
884       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
885     }
886     if(iterr!=0) {
887       ithreaderr = 1;
888     }
889     if(PetscThreadGo) {
890       /* reset job, get ready for more */
891       if(PeeOn==PETSC_FALSE) {
892         /* check your subordinates, waiting for them to be ready
893          how do you know for a fact that a given subordinate has actually started? */
894 	for(i=1;i<=Mary;i++) {
895 	  SubWorker = Mary*ThreadId+i;
896           if(SubWorker<PetscMaxThreads) {
897             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
898             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
899               /* upon entry, automically releases the lock and blocks
900                upon return, has the lock */
901               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
902             }
903             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
904           }
905 	}
906         /* your subordinates are now ready */
907       }
908       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
909       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
910       if(ThreadId==0) {
911 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
912         /* root thread signals 'main' */
913         ierr = pthread_cond_signal(&main_cond);
914       }
915       else {
916         /* signal your boss before you go to sleep */
917         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
918       }
919     }
920   }
921   return NULL;
922 }
923 
924 #undef __FUNCT__
925 #define __FUNCT__ "PetscThreadInitialize_Tree"
926 void* PetscThreadInitialize_Tree(PetscInt N) {
927   PetscInt i,ierr;
928   int status;
929 
930   if(PetscUseThreadPool) {
931     size_t Val1 = (size_t)CACHE_LINE_SIZE;
932     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
933     arrmutex = (char*)memalign(Val1,Val2);
934     arrcond1 = (char*)memalign(Val1,Val2);
935     arrcond2 = (char*)memalign(Val1,Val2);
936     arrstart = (char*)memalign(Val1,Val2);
937     arrready = (char*)memalign(Val1,Val2);
938     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
939     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
940     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
941     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
942     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
943     /* initialize job structure */
944     for(i=0; i<PetscMaxThreads; i++) {
945       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
946       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
947       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
948       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
949       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
950     }
951     for(i=0; i<PetscMaxThreads; i++) {
952       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
953       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
954       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
955       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
956       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
957     }
958     job_tree.pfunc = NULL;
959     job_tree.pdata = (void**)malloc(N*sizeof(void*));
960     job_tree.startJob = PETSC_FALSE;
961     job_tree.eJobStat = JobInitiated;
962     pVal = (int*)malloc(N*sizeof(int));
963     /* allocate memory in the heap for the thread structure */
964     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
965     /* create threads */
966     for(i=0; i<N; i++) {
967       pVal[i] = i;
968       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
969       /* should check status */
970     }
971   }
972   return NULL;
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "PetscThreadFinalize_Tree"
977 PetscErrorCode PetscThreadFinalize_Tree() {
978   int i,ierr;
979   void* jstatus;
980 
981   PetscFunctionBegin;
982 
983   if(PetscUseThreadPool) {
984     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
985     /* join the threads */
986     for(i=0; i<PetscMaxThreads; i++) {
987       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
988       /* do error checking*/
989     }
990     free(PetscThreadPoint);
991     free(arrmutex);
992     free(arrcond1);
993     free(arrcond2);
994     free(arrstart);
995     free(arrready);
996     free(job_tree.pdata);
997     free(pVal);
998   }
999   else {
1000   }
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MainWait_Tree"
1006 void MainWait_Tree() {
1007   int ierr;
1008   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1009   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1010     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1011   }
1012   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MainJob_Tree"
1017 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1018   int i,ierr;
1019   PetscErrorCode ijoberr = 0;
1020   if(PetscUseThreadPool) {
1021     MainWait();
1022     job_tree.pfunc = pFunc;
1023     job_tree.pdata = data;
1024     job_tree.startJob = PETSC_TRUE;
1025     for(i=0; i<PetscMaxThreads; i++) {
1026       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1027     }
1028     job_tree.eJobStat = JobInitiated;
1029     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1030     if(pFunc!=FuncFinish) {
1031       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1032     }
1033   }
1034   else {
1035     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1036     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1037     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1038     free(apThread);
1039   }
1040   if(ithreaderr) {
1041     ijoberr = ithreaderr;
1042   }
1043   return ijoberr;
1044 }
1045 /****  ****/
1046 
1047 /**** 'Main' Thread Pool Functions ****/
1048 void* PetscThreadFunc_Main(void* arg) {
1049   PetscErrorCode iterr;
1050   int icorr,ierr;
1051   int* pId = (int*)arg;
1052   int ThreadId = *pId;
1053   cpu_set_t mset;
1054   //printf("Thread %d In Main Thread Function\n",ThreadId);
1055   icorr = ThreadCoreAffinity[ThreadId];
1056   CPU_ZERO(&mset);
1057   CPU_SET(icorr,&mset);
1058   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1059 
1060   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1061   /* update your ready status */
1062   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1063   /* tell the BOSS that you're ready to work before you go to sleep */
1064   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1065 
1066   /* the while loop needs to have an exit
1067      the 'main' thread can terminate all the threads by performing a broadcast
1068      and calling FuncFinish */
1069   while(PetscThreadGo) {
1070     /* need to check the condition to ensure we don't have to wait
1071        waiting when you don't have to causes problems
1072      also need to check the condition to ensure proper handling of spurious wakeups */
1073     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1074       /* upon entry, atomically releases the lock and blocks
1075        upon return, has the lock */
1076         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1077 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1078     }
1079     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1080     if(job_main.pdata==NULL) {
1081       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1082     }
1083     else {
1084       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1085     }
1086     if(iterr!=0) {
1087       ithreaderr = 1;
1088     }
1089     if(PetscThreadGo) {
1090       /* reset job, get ready for more */
1091       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1092       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1093       /* tell the BOSS that you're ready to work before you go to sleep */
1094       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1095     }
1096   }
1097   return NULL;
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "PetscThreadInitialize_Main"
1102 void* PetscThreadInitialize_Main(PetscInt N) {
1103   PetscInt i,ierr;
1104   int status;
1105 
1106   if(PetscUseThreadPool) {
1107     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1108     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1109     arrmutex = (char*)memalign(Val1,Val2);
1110     arrcond1 = (char*)memalign(Val1,Val2);
1111     arrcond2 = (char*)memalign(Val1,Val2);
1112     arrstart = (char*)memalign(Val1,Val2);
1113     arrready = (char*)memalign(Val1,Val2);
1114     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1115     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1116     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1117     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1118     /* initialize job structure */
1119     for(i=0; i<PetscMaxThreads; i++) {
1120       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1121       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1122       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1123       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1124     }
1125     for(i=0; i<PetscMaxThreads; i++) {
1126       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1127       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1128       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1129       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1130     }
1131     job_main.pfunc = NULL;
1132     job_main.pdata = (void**)malloc(N*sizeof(void*));
1133     pVal = (int*)malloc(N*sizeof(int));
1134     /* allocate memory in the heap for the thread structure */
1135     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1136     /* create threads */
1137     for(i=0; i<N; i++) {
1138       pVal[i] = i;
1139       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1140       /* error check */
1141     }
1142   }
1143   else {
1144   }
1145   return NULL;
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "PetscThreadFinalize_Main"
1150 PetscErrorCode PetscThreadFinalize_Main() {
1151   int i,ierr;
1152   void* jstatus;
1153 
1154   PetscFunctionBegin;
1155 
1156   if(PetscUseThreadPool) {
1157     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1158     /* join the threads */
1159     for(i=0; i<PetscMaxThreads; i++) {
1160       ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1161     }
1162     free(PetscThreadPoint);
1163     free(arrmutex);
1164     free(arrcond1);
1165     free(arrcond2);
1166     free(arrstart);
1167     free(arrready);
1168     free(job_main.pdata);
1169     free(pVal);
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MainWait_Main"
1176 void MainWait_Main() {
1177   int i,ierr;
1178   for(i=0; i<PetscMaxThreads; i++) {
1179     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1180     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1181       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1182     }
1183     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1184   }
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MainJob_Main"
1189 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1190   int i,ierr;
1191   PetscErrorCode ijoberr = 0;
1192   if(PetscUseThreadPool) {
1193     MainWait(); /* you know everyone is waiting to be signalled! */
1194     job_main.pfunc = pFunc;
1195     job_main.pdata = data;
1196     for(i=0; i<PetscMaxThreads; i++) {
1197       *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1198     }
1199     /* tell the threads to go to work */
1200     for(i=0; i<PetscMaxThreads; i++) {
1201       ierr = pthread_cond_signal(job_main.cond2array[i]);
1202     }
1203     if(pFunc!=FuncFinish) {
1204       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1205     }
1206   }
1207   else {
1208     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1209     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1210     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1211     free(apThread);
1212   }
1213   if(ithreaderr) {
1214     ijoberr = ithreaderr;
1215   }
1216   return ijoberr;
1217 }
1218 /****  ****/
1219 
1220 /**** Chain Thread Functions ****/
1221 void* PetscThreadFunc_Chain(void* arg) {
1222   PetscErrorCode iterr;
1223   int icorr,ierr;
1224   int* pId = (int*)arg;
1225   int ThreadId = *pId;
1226   int SubWorker = ThreadId + 1;
1227   PetscBool PeeOn;
1228   cpu_set_t mset;
1229   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1230   icorr = ThreadCoreAffinity[ThreadId];
1231   CPU_ZERO(&mset);
1232   CPU_SET(icorr,&mset);
1233   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1234 
1235   if(ThreadId==(PetscMaxThreads-1)) {
1236     PeeOn = PETSC_TRUE;
1237   }
1238   else {
1239     PeeOn = PETSC_FALSE;
1240   }
1241   if(PeeOn==PETSC_FALSE) {
1242     /* check your subordinate, wait for him to be ready */
1243     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1244     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1245       /* upon entry, automically releases the lock and blocks
1246        upon return, has the lock */
1247       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1248     }
1249     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1250     /* your subordinate is now ready*/
1251   }
1252   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1253   /* update your ready status */
1254   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1255   if(ThreadId==0) {
1256     job_chain.eJobStat = JobCompleted;
1257     /* signal main */
1258     ierr = pthread_cond_signal(&main_cond);
1259   }
1260   else {
1261     /* tell your boss that you're ready to work */
1262     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1263   }
1264   /*  the while loop needs to have an exit
1265      the 'main' thread can terminate all the threads by performing a broadcast
1266    and calling FuncFinish */
1267   while(PetscThreadGo) {
1268     /* need to check the condition to ensure we don't have to wait
1269        waiting when you don't have to causes problems
1270      also need to check the condition to ensure proper handling of spurious wakeups */
1271     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1272       /*upon entry, automically releases the lock and blocks
1273        upon return, has the lock */
1274         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1275 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1276 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1277     }
1278     if(ThreadId==0) {
1279       job_chain.startJob = PETSC_FALSE;
1280       job_chain.eJobStat = ThreadsWorking;
1281     }
1282     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1283     if(PeeOn==PETSC_FALSE) {
1284       /* tell your subworker it's time to get to work */
1285       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1286     }
1287     /* do your job */
1288     if(job_chain.pdata==NULL) {
1289       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1290     }
1291     else {
1292       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1293     }
1294     if(iterr!=0) {
1295       ithreaderr = 1;
1296     }
1297     if(PetscThreadGo) {
1298       /* reset job, get ready for more */
1299       if(PeeOn==PETSC_FALSE) {
1300         /* check your subordinate, wait for him to be ready
1301          how do you know for a fact that your subordinate has actually started? */
1302         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1303         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1304           /* upon entry, automically releases the lock and blocks
1305            upon return, has the lock */
1306           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1307         }
1308         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1309         /* your subordinate is now ready */
1310       }
1311       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1312       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1313       if(ThreadId==0) {
1314 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1315         /* root thread (foreman) signals 'main' */
1316         ierr = pthread_cond_signal(&main_cond);
1317       }
1318       else {
1319         /* signal your boss before you go to sleep */
1320         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1321       }
1322     }
1323   }
1324   return NULL;
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "PetscThreadInitialize_Chain"
1329 void* PetscThreadInitialize_Chain(PetscInt N) {
1330   PetscInt i,ierr;
1331   int status;
1332 
1333   if(PetscUseThreadPool) {
1334     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1335     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1336     arrmutex = (char*)memalign(Val1,Val2);
1337     arrcond1 = (char*)memalign(Val1,Val2);
1338     arrcond2 = (char*)memalign(Val1,Val2);
1339     arrstart = (char*)memalign(Val1,Val2);
1340     arrready = (char*)memalign(Val1,Val2);
1341     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1342     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1343     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1344     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1345     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1346     /* initialize job structure */
1347     for(i=0; i<PetscMaxThreads; i++) {
1348       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1349       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1350       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1351       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1352       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1353     }
1354     for(i=0; i<PetscMaxThreads; i++) {
1355       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1356       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1357       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1358       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1359       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1360     }
1361     job_chain.pfunc = NULL;
1362     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1363     job_chain.startJob = PETSC_FALSE;
1364     job_chain.eJobStat = JobInitiated;
1365     pVal = (int*)malloc(N*sizeof(int));
1366     /* allocate memory in the heap for the thread structure */
1367     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1368     /* create threads */
1369     for(i=0; i<N; i++) {
1370       pVal[i] = i;
1371       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1372       /* should check error */
1373     }
1374   }
1375   else {
1376   }
1377   return NULL;
1378 }
1379 
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "PetscThreadFinalize_Chain"
1383 PetscErrorCode PetscThreadFinalize_Chain() {
1384   int i,ierr;
1385   void* jstatus;
1386 
1387   PetscFunctionBegin;
1388 
1389   if(PetscUseThreadPool) {
1390     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1391     /* join the threads */
1392     for(i=0; i<PetscMaxThreads; i++) {
1393       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1394       /* should check error */
1395     }
1396     free(PetscThreadPoint);
1397     free(arrmutex);
1398     free(arrcond1);
1399     free(arrcond2);
1400     free(arrstart);
1401     free(arrready);
1402     free(job_chain.pdata);
1403     free(pVal);
1404   }
1405   else {
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MainWait_Chain"
1412 void MainWait_Chain() {
1413   int ierr;
1414   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1415   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1416     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1417   }
1418   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MainJob_Chain"
1423 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1424   int i,ierr;
1425   PetscErrorCode ijoberr = 0;
1426   if(PetscUseThreadPool) {
1427     MainWait();
1428     job_chain.pfunc = pFunc;
1429     job_chain.pdata = data;
1430     job_chain.startJob = PETSC_TRUE;
1431     for(i=0; i<PetscMaxThreads; i++) {
1432       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1433     }
1434     job_chain.eJobStat = JobInitiated;
1435     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1436     if(pFunc!=FuncFinish) {
1437       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1438     }
1439   }
1440   else {
1441     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1442     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1443     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1444     free(apThread);
1445   }
1446   if(ithreaderr) {
1447     ijoberr = ithreaderr;
1448   }
1449   return ijoberr;
1450 }
1451 /****  ****/
1452 
1453 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1454 /**** True Thread Functions ****/
1455 void* PetscThreadFunc_True(void* arg) {
1456   int icorr,ierr,iVal;
1457   int* pId = (int*)arg;
1458   int ThreadId = *pId;
1459   PetscErrorCode iterr;
1460   cpu_set_t mset;
1461   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1462   icorr = ThreadCoreAffinity[ThreadId];
1463   CPU_ZERO(&mset);
1464   CPU_SET(icorr,&mset);
1465   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1466 
1467   ierr = pthread_mutex_lock(&job_true.mutex);
1468   job_true.iNumReadyThreads++;
1469   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1470     ierr = pthread_cond_signal(&main_cond);
1471   }
1472   /*the while loop needs to have an exit
1473     the 'main' thread can terminate all the threads by performing a broadcast
1474    and calling FuncFinish */
1475   while(PetscThreadGo) {
1476     /*need to check the condition to ensure we don't have to wait
1477       waiting when you don't have to causes problems
1478      also need to wait if another thread sneaks in and messes with the predicate */
1479     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1480       /* upon entry, automically releases the lock and blocks
1481        upon return, has the lock */
1482       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1483     }
1484     job_true.startJob = PETSC_FALSE;
1485     job_true.iNumJobThreads--;
1486     job_true.iNumReadyThreads--;
1487     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1488     pthread_mutex_unlock(&job_true.mutex);
1489     if(job_true.pdata==NULL) {
1490       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1491     }
1492     else {
1493       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1494     }
1495     if(iterr!=0) {
1496       ithreaderr = 1;
1497     }
1498     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1499       what happens if a thread finishes before they all start? BAD!
1500      what happens if a thread finishes before any else start? BAD! */
1501     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1502     /* reset job */
1503     if(PetscThreadGo) {
1504       pthread_mutex_lock(&job_true.mutex);
1505       job_true.iNumReadyThreads++;
1506       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1507 	/* signal the 'main' thread that the job is done! (only done once) */
1508 	ierr = pthread_cond_signal(&main_cond);
1509       }
1510     }
1511   }
1512   return NULL;
1513 }
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "PetscThreadInitialize_True"
1517 void* PetscThreadInitialize_True(PetscInt N) {
1518   PetscInt i;
1519   int status;
1520 
1521   if(PetscUseThreadPool) {
1522     pVal = (int*)malloc(N*sizeof(int));
1523     /* allocate memory in the heap for the thread structure */
1524     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1525     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1526     job_true.pdata = (void**)malloc(N*sizeof(void*));
1527     for(i=0; i<N; i++) {
1528       pVal[i] = i;
1529       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1530       /* error check to ensure proper thread creation */
1531       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1532       /* should check error */
1533     }
1534   }
1535   else {
1536   }
1537   return NULL;
1538 }
1539 
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "PetscThreadFinalize_True"
1543 PetscErrorCode PetscThreadFinalize_True() {
1544   int i,ierr;
1545   void* jstatus;
1546 
1547   PetscFunctionBegin;
1548 
1549   if(PetscUseThreadPool) {
1550     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1551     /* join the threads */
1552     for(i=0; i<PetscMaxThreads; i++) {
1553       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1554       /* should check error */
1555     }
1556     free(BarrPoint);
1557     free(PetscThreadPoint);
1558   }
1559   else {
1560   }
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MainWait_True"
1566 void MainWait_True() {
1567   int ierr;
1568   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1569     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1570   }
1571   ierr = pthread_mutex_unlock(&job_true.mutex);
1572 }
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MainJob_True"
1576 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1577   int ierr;
1578   PetscErrorCode ijoberr = 0;
1579   if(PetscUseThreadPool) {
1580     MainWait();
1581     job_true.pfunc = pFunc;
1582     job_true.pdata = data;
1583     job_true.pbarr = &BarrPoint[n];
1584     job_true.iNumJobThreads = n;
1585     job_true.startJob = PETSC_TRUE;
1586     ierr = pthread_cond_broadcast(&job_true.cond);
1587     if(pFunc!=FuncFinish) {
1588       MainWait(); /* why wait after? guarantees that job gets done */
1589     }
1590   }
1591   else {
1592     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1593     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1594     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1595     free(apThread);
1596   }
1597   if(ithreaderr) {
1598     ijoberr = ithreaderr;
1599   }
1600   return ijoberr;
1601 }
1602 /****  ****/
1603 #endif
1604 
1605 void* FuncFinish(void* arg) {
1606   PetscThreadGo = PETSC_FALSE;
1607   return(0);
1608 }
1609 
1610 #endif
1611