xref: /petsc/src/sys/objects/init.c (revision 683509dccb4a1d8b9935f30088fb0a925611f837)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #define _GNU_SOURCE
10 #include <sched.h>
11 #include <petscsys.h>        /*I  "petscsys.h"   I*/
12 #if defined(PETSC_USE_PTHREAD)
13 #include <pthread.h>
14 #endif
15 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
16 #include <sys/sysinfo.h>
17 #endif
18 #include <unistd.h>
19 #if defined(PETSC_HAVE_STDLIB_H)
20 #include <stdlib.h>
21 #endif
22 #if defined(PETSC_HAVE_MALLOC_H)
23 #include <malloc.h>
24 #endif
25 #if defined(PETSC_HAVE_VALGRIND)
26 #include <valgrind/valgrind.h>
27 #endif
28 
29 /* ------------------------Nasty global variables -------------------------------*/
30 /*
31      Indicates if PETSc started up MPI, or it was
32    already started before PETSc was initialized.
33 */
34 PetscBool    PetscBeganMPI         = PETSC_FALSE;
35 PetscBool    PetscInitializeCalled = PETSC_FALSE;
36 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
37 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
38 PetscBool    PetscThreadGo         = PETSC_TRUE;
39 PetscMPIInt  PetscGlobalRank = -1;
40 PetscMPIInt  PetscGlobalSize = -1;
41 
42 #if defined(PETSC_USE_PTHREAD_CLASSES)
43 PetscMPIInt  PetscMaxThreads = 2;
44 pthread_t*   PetscThreadPoint;
45 #define PETSC_HAVE_PTHREAD_BARRIER
46 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
47 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
48 #endif
49 PetscErrorCode ithreaderr = 0;
50 int*         pVal;
51 
52 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
53 int* ThreadCoreAffinity;
54 
55 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
56 
57 typedef struct {
58   pthread_mutex_t** mutexarray;
59   pthread_cond_t**  cond1array;
60   pthread_cond_t** cond2array;
61   void* (*pfunc)(void*);
62   void** pdata;
63   PetscBool startJob;
64   estat eJobStat;
65   PetscBool** arrThreadStarted;
66   PetscBool** arrThreadReady;
67 } sjob_tree;
68 sjob_tree job_tree;
69 typedef struct {
70   pthread_mutex_t** mutexarray;
71   pthread_cond_t**  cond1array;
72   pthread_cond_t** cond2array;
73   void* (*pfunc)(void*);
74   void** pdata;
75   PetscBool** arrThreadReady;
76 } sjob_main;
77 sjob_main job_main;
78 typedef struct {
79   pthread_mutex_t** mutexarray;
80   pthread_cond_t**  cond1array;
81   pthread_cond_t** cond2array;
82   void* (*pfunc)(void*);
83   void** pdata;
84   PetscBool startJob;
85   estat eJobStat;
86   PetscBool** arrThreadStarted;
87   PetscBool** arrThreadReady;
88 } sjob_chain;
89 sjob_chain job_chain;
90 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
91 typedef struct {
92   pthread_mutex_t mutex;
93   pthread_cond_t cond;
94   void* (*pfunc)(void*);
95   void** pdata;
96   pthread_barrier_t* pbarr;
97   int iNumJobThreads;
98   int iNumReadyThreads;
99   PetscBool startJob;
100 } sjob_true;
101 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
102 #endif
103 
104 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
105 char* arrmutex; /* used by 'chain','main','tree' thread pools */
106 char* arrcond1; /* used by 'chain','main','tree' thread pools */
107 char* arrcond2; /* used by 'chain','main','tree' thread pools */
108 char* arrstart; /* used by 'chain','main','tree' thread pools */
109 char* arrready; /* used by 'chain','main','tree' thread pools */
110 
111 /* Function Pointers */
112 void*          (*PetscThreadFunc)(void*) = NULL;
113 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
114 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
115 void           (*MainWait)(void) = NULL;
116 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
117 /**** Tree Functions ****/
118 void*          PetscThreadFunc_Tree(void*);
119 void*          PetscThreadInitialize_Tree(PetscInt);
120 PetscErrorCode PetscThreadFinalize_Tree(void);
121 void           MainWait_Tree(void);
122 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
123 /**** Main Functions ****/
124 void*          PetscThreadFunc_Main(void*);
125 void*          PetscThreadInitialize_Main(PetscInt);
126 PetscErrorCode PetscThreadFinalize_Main(void);
127 void           MainWait_Main(void);
128 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
129 /**** Chain Functions ****/
130 void*          PetscThreadFunc_Chain(void*);
131 void*          PetscThreadInitialize_Chain(PetscInt);
132 PetscErrorCode PetscThreadFinalize_Chain(void);
133 void           MainWait_Chain(void);
134 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
135 /**** True Functions ****/
136 void*          PetscThreadFunc_True(void*);
137 void*          PetscThreadInitialize_True(PetscInt);
138 PetscErrorCode PetscThreadFinalize_True(void);
139 void           MainWait_True(void);
140 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
141 /****  ****/
142 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
143 
144 void* FuncFinish(void*);
145 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
146 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
147 #endif
148 
149 #if defined(PETSC_USE_COMPLEX)
150 #if defined(PETSC_COMPLEX_INSTANTIATE)
151 template <> class std::complex<double>; /* instantiate complex template class */
152 #endif
153 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
154 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
155 MPI_Datatype   MPI_C_COMPLEX;
156 #endif
157 PetscScalar    PETSC_i;
158 #else
159 PetscScalar    PETSC_i = 0.0;
160 #endif
161 #if defined(PETSC_USE_REAL___FLOAT128)
162 MPI_Datatype   MPIU___FLOAT128 = 0;
163 #endif
164 MPI_Datatype   MPIU_2SCALAR = 0;
165 MPI_Datatype   MPIU_2INT = 0;
166 
167 /*
168      These are needed by petscbt.h
169 */
170 #include <petscbt.h>
171 char      _BT_mask = ' ';
172 char      _BT_c = ' ';
173 PetscInt  _BT_idx  = 0;
174 
175 /*
176        Function that is called to display all error messages
177 */
178 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
179 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
180 #if defined(PETSC_HAVE_MATLAB_ENGINE)
181 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
182 #else
183 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
184 #endif
185 /*
186   This is needed to turn on/off cusp synchronization */
187 PetscBool   synchronizeCUSP = PETSC_FALSE;
188 
189 /* ------------------------------------------------------------------------------*/
190 /*
191    Optional file where all PETSc output from various prints is saved
192 */
193 FILE *petsc_history = PETSC_NULL;
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "PetscOpenHistoryFile"
197 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
198 {
199   PetscErrorCode ierr;
200   PetscMPIInt    rank,size;
201   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
202   char           version[256];
203 
204   PetscFunctionBegin;
205   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
206   if (!rank) {
207     char        arch[10];
208     int         err;
209     PetscViewer viewer;
210 
211     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
212     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
213     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
214     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
215     if (filename) {
216       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
217     } else {
218       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
219       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
220       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
221     }
222 
223     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
224     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
225     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
226     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
227     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
228     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
229     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
230     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
231     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
232     err = fflush(*fd);
233     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "PetscCloseHistoryFile"
240 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
241 {
242   PetscErrorCode ierr;
243   PetscMPIInt    rank;
244   char           date[64];
245   int            err;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
249   if (!rank) {
250     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
251     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
252     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
253     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
254     err = fflush(*fd);
255     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
256     err = fclose(*fd);
257     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 /* ------------------------------------------------------------------------------*/
263 
264 /*
265    This is ugly and probably belongs somewhere else, but I want to
266   be able to put a true MPI abort error handler with command line args.
267 
268     This is so MPI errors in the debugger will leave all the stack
269   frames. The default MP_Abort() cleans up and exits thus providing no useful information
270   in the debugger hence we call abort() instead of MPI_Abort().
271 */
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "Petsc_MPI_AbortOnError"
275 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
276 {
277   PetscFunctionBegin;
278   (*PetscErrorPrintf)("MPI error %d\n",*flag);
279   abort();
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
284 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
285 {
286   PetscErrorCode ierr;
287 
288   PetscFunctionBegin;
289   (*PetscErrorPrintf)("MPI error %d\n",*flag);
290   ierr = PetscAttachDebugger();
291   if (ierr) { /* hopeless so get out */
292     MPI_Abort(*comm,*flag);
293   }
294 }
295 
296 #undef __FUNCT__
297 #define __FUNCT__ "PetscEnd"
298 /*@C
299    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
300      wishes a clean exit somewhere deep in the program.
301 
302    Collective on PETSC_COMM_WORLD
303 
304    Options Database Keys are the same as for PetscFinalize()
305 
306    Level: advanced
307 
308    Note:
309    See PetscInitialize() for more general runtime options.
310 
311 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
312 @*/
313 PetscErrorCode  PetscEnd(void)
314 {
315   PetscFunctionBegin;
316   PetscFinalize();
317   exit(0);
318   return 0;
319 }
320 
321 PetscBool    PetscOptionsPublish = PETSC_FALSE;
322 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
323 extern PetscBool  petscsetmallocvisited;
324 static char       emacsmachinename[256];
325 
326 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
327 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "PetscSetHelpVersionFunctions"
331 /*@C
332    PetscSetHelpVersionFunctions - Sets functions that print help and version information
333    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
334    This routine enables a "higher-level" package that uses PETSc to print its messages first.
335 
336    Input Parameter:
337 +  help - the help function (may be PETSC_NULL)
338 -  version - the version function (may be PETSC_NULL)
339 
340    Level: developer
341 
342    Concepts: package help message
343 
344 @*/
345 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
346 {
347   PetscFunctionBegin;
348   PetscExternalHelpFunction    = help;
349   PetscExternalVersionFunction = version;
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
355 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
356 {
357   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
358   MPI_Comm       comm = PETSC_COMM_WORLD;
359   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
360   PetscErrorCode ierr;
361   PetscReal      si;
362   int            i;
363   PetscMPIInt    rank;
364   char           version[256];
365 
366   PetscFunctionBegin;
367   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
368 
369   /*
370       Setup the memory management; support for tracing malloc() usage
371   */
372   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
373 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
374   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
375   if ((!flg2 || flg1) && !petscsetmallocvisited) {
376 #if defined(PETSC_HAVE_VALGRIND)
377     if (flg2 || !(RUNNING_ON_VALGRIND)) {
378       /* turn off default -malloc if valgrind is being used */
379 #endif
380       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
381 #if defined(PETSC_HAVE_VALGRIND)
382     }
383 #endif
384   }
385 #else
386   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
387   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
388   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
389 #endif
390   if (flg3) {
391     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
392   }
393   flg1 = PETSC_FALSE;
394   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
395   if (flg1) {
396     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
397     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
398   }
399 
400   flg1 = PETSC_FALSE;
401   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
402   if (!flg1) {
403     flg1 = PETSC_FALSE;
404     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
405   }
406   if (flg1) {
407     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
408   }
409 
410   /*
411       Set the display variable for graphics
412   */
413   ierr = PetscSetDisplay();CHKERRQ(ierr);
414 
415 
416   /*
417       Print the PETSc version information
418   */
419   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
420   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
421   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
422   if (flg1 || flg2 || flg3){
423 
424     /*
425        Print "higher-level" package version message
426     */
427     if (PetscExternalVersionFunction) {
428       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
429     }
430 
431     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
432     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
433 ------------------------------\n");CHKERRQ(ierr);
434     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
435     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
436     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
437     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
439     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
441 ------------------------------\n");CHKERRQ(ierr);
442   }
443 
444   /*
445        Print "higher-level" package help message
446   */
447   if (flg3){
448     if (PetscExternalHelpFunction) {
449       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
450     }
451   }
452 
453   /*
454       Setup the error handling
455   */
456   flg1 = PETSC_FALSE;
457   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
458   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
459   flg1 = PETSC_FALSE;
460   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
461   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
462   flg1 = PETSC_FALSE;
463   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
464   if (flg1) {
465     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
466   }
467   flg1 = PETSC_FALSE;
468   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
469   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
470   flg1 = PETSC_FALSE;
471   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
472   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
473 
474   /*
475       Setup debugger information
476   */
477   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
478   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
479   if (flg1) {
480     MPI_Errhandler err_handler;
481 
482     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
483     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
484     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
485     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
486   }
487   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
488   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
489   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
490   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
491   if (flg1 || flg2) {
492     PetscMPIInt    size;
493     PetscInt       lsize,*nodes;
494     MPI_Errhandler err_handler;
495     /*
496        we have to make sure that all processors have opened
497        connections to all other processors, otherwise once the
498        debugger has stated it is likely to receive a SIGUSR1
499        and kill the program.
500     */
501     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
502     if (size > 2) {
503       PetscMPIInt dummy = 0;
504       MPI_Status  status;
505       for (i=0; i<size; i++) {
506         if (rank != i) {
507           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
508         }
509       }
510       for (i=0; i<size; i++) {
511         if (rank != i) {
512           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
513         }
514       }
515     }
516     /* check if this processor node should be in debugger */
517     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
518     lsize = size;
519     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
520     if (flag) {
521       for (i=0; i<lsize; i++) {
522         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
523       }
524     }
525     if (!flag) {
526       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
527       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
528       if (flg1) {
529         ierr = PetscAttachDebugger();CHKERRQ(ierr);
530       } else {
531         ierr = PetscStopForDebugger();CHKERRQ(ierr);
532       }
533       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
534       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
535     }
536     ierr = PetscFree(nodes);CHKERRQ(ierr);
537   }
538 
539   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
540   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
541 
542 #if defined(PETSC_USE_SOCKET_VIEWER)
543   /*
544     Activates new sockets for zope if needed
545   */
546   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
547   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
548   if (flgz){
549     int  sockfd;
550     char hostname[256];
551     char username[256];
552     int  remoteport = 9999;
553 
554     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
555     if (!hostname[0]){
556       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
557     }
558     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
559     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
560     PETSC_ZOPEFD = fdopen(sockfd, "w");
561     if (flgzout){
562       PETSC_STDOUT = PETSC_ZOPEFD;
563       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
564       fprintf(PETSC_STDOUT, "<<<start>>>");
565     } else {
566       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
567       fprintf(PETSC_ZOPEFD, "<<<start>>>");
568     }
569   }
570 #endif
571 #if defined(PETSC_USE_SERVER)
572   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
573   if (flgz){
574     PetscInt port = PETSC_DECIDE;
575     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
576     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
577   }
578 #endif
579 
580   /*
581         Setup profiling and logging
582   */
583 #if defined (PETSC_USE_INFO)
584   {
585     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
586     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
587     if (flg1 && logname[0]) {
588       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
589     } else if (flg1) {
590       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
591     }
592   }
593 #endif
594 #if defined(PETSC_USE_LOG)
595   mname[0] = 0;
596   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
597   if (flg1) {
598     if (mname[0]) {
599       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
600     } else {
601       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
602     }
603   }
604 #if defined(PETSC_HAVE_MPE)
605   flg1 = PETSC_FALSE;
606   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
607   if (flg1) PetscLogMPEBegin();
608 #endif
609   flg1 = PETSC_FALSE;
610   flg2 = PETSC_FALSE;
611   flg3 = PETSC_FALSE;
612   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
613   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
614   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
615   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
616   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
617   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
618 
619   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
620   if (flg1) {
621     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
622     FILE *file;
623     if (mname[0]) {
624       sprintf(name,"%s.%d",mname,rank);
625       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
626       file = fopen(fname,"w");
627       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
628     } else {
629       file = PETSC_STDOUT;
630     }
631     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
632   }
633 #endif
634 
635   /*
636       Setup building of stack frames for all function calls
637   */
638 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
639   ierr = PetscStackCreate();CHKERRQ(ierr);
640 #endif
641 
642   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
643 
644 #if defined(PETSC_USE_PTHREAD_CLASSES)
645   /*
646       Determine whether user specified maximum number of threads
647    */
648   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
649 
650   /*
651       Determine whether to use thread pool
652    */
653   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
654   if (flg1) {
655     PetscUseThreadPool = PETSC_TRUE;
656     PetscInt N_CORES = get_nprocs();
657     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
658     char tstr[9];
659     char tbuf[2];
660     strcpy(tstr,"-thread");
661     for(i=0;i<PetscMaxThreads;i++) {
662       ThreadCoreAffinity[i] = i;
663       sprintf(tbuf,"%d",i);
664       strcat(tstr,tbuf);
665       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
666       if(flg1) {
667         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
668         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
669       }
670       tstr[7] = '\0';
671     }
672     /* get the thread pool type */
673     PetscInt ipool = 0;
674     const char *choices[4] = {"true","tree","main","chain"};
675 
676     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
677     switch(ipool) {
678     case 1:
679       PetscThreadFunc       = &PetscThreadFunc_Tree;
680       PetscThreadInitialize = &PetscThreadInitialize_Tree;
681       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
682       MainWait              = &MainWait_Tree;
683       MainJob               = &MainJob_Tree;
684       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
685       break;
686     case 2:
687       PetscThreadFunc       = &PetscThreadFunc_Main;
688       PetscThreadInitialize = &PetscThreadInitialize_Main;
689       PetscThreadFinalize   = &PetscThreadFinalize_Main;
690       MainWait              = &MainWait_Main;
691       MainJob               = &MainJob_Main;
692       PetscInfo(PETSC_NULL,"Using main thread pool\n");
693       break;
694 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
695     case 3:
696 #else
697     default:
698 #endif
699       PetscThreadFunc       = &PetscThreadFunc_Chain;
700       PetscThreadInitialize = &PetscThreadInitialize_Chain;
701       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
702       MainWait              = &MainWait_Chain;
703       MainJob               = &MainJob_Chain;
704       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
705       break;
706 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
707     default:
708       PetscThreadFunc       = &PetscThreadFunc_True;
709       PetscThreadInitialize = &PetscThreadInitialize_True;
710       PetscThreadFinalize   = &PetscThreadFinalize_True;
711       MainWait              = &MainWait_True;
712       MainJob               = &MainJob_True;
713       PetscInfo(PETSC_NULL,"Using true thread pool\n");
714       break;
715 #endif
716     }
717     PetscThreadInitialize(PetscMaxThreads);
718   } else {
719     //need to define these in the case on 'no threads' or 'thread create/destroy'
720     //could take any of the above versions
721     MainJob               = &MainJob_Spawn;
722   }
723 #endif
724   /*
725        Print basic help message
726   */
727   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
728   if (flg1) {
729     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
730     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
731     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
732     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
733     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
734     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
735     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
736     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
737     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
738     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
739     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
740     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
741     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
742     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
743     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
762 #if defined(PETSC_USE_LOG)
763     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
764     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
766 #if defined(PETSC_HAVE_MPE)
767     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
768 #endif
769     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
770 #endif
771     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
773     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
775   }
776 
777   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
778   if (flg1) {
779     ierr = PetscSleep(si);CHKERRQ(ierr);
780   }
781 
782   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
783   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
784   if (f) {
785     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
786   }
787 
788 #if defined(PETSC_HAVE_CUSP)
789   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
790   if (flg3) flg1 = PETSC_TRUE;
791   else flg1 = PETSC_FALSE;
792   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
793   if (flg1) synchronizeCUSP = PETSC_TRUE;
794 #endif
795 
796   PetscFunctionReturn(0);
797 }
798 
799 #if defined(PETSC_USE_PTHREAD_CLASSES)
800 
801 /**** 'Tree' Thread Pool Functions ****/
802 void* PetscThreadFunc_Tree(void* arg) {
803   PetscErrorCode iterr;
804   int icorr,ierr;
805   int* pId = (int*)arg;
806   int ThreadId = *pId,Mary = 2,i,SubWorker;
807   PetscBool PeeOn;
808   cpu_set_t mset;
809   //printf("Thread %d In Tree Thread Function\n",ThreadId);
810   icorr = ThreadCoreAffinity[ThreadId];
811   CPU_ZERO(&mset);
812   CPU_SET(icorr,&mset);
813   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
814 
815   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
816     PeeOn = PETSC_TRUE;
817   }
818   else {
819     PeeOn = PETSC_FALSE;
820   }
821   if(PeeOn==PETSC_FALSE) {
822     /* check your subordinates, wait for them to be ready */
823     for(i=1;i<=Mary;i++) {
824       SubWorker = Mary*ThreadId+i;
825       if(SubWorker<PetscMaxThreads) {
826         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
827         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
828           /* upon entry, automically releases the lock and blocks
829            upon return, has the lock */
830           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
831         }
832         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
833       }
834     }
835     /* your subordinates are now ready */
836   }
837   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
838   /* update your ready status */
839   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
840   if(ThreadId==0) {
841     job_tree.eJobStat = JobCompleted;
842     /* ignal main */
843     ierr = pthread_cond_signal(&main_cond);
844   }
845   else {
846     /* tell your boss that you're ready to work */
847     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
848   }
849   /* the while loop needs to have an exit
850   the 'main' thread can terminate all the threads by performing a broadcast
851    and calling FuncFinish */
852   while(PetscThreadGo) {
853     /*need to check the condition to ensure we don't have to wait
854       waiting when you don't have to causes problems
855      also need to check the condition to ensure proper handling of spurious wakeups */
856     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
857       /* upon entry, automically releases the lock and blocks
858        upon return, has the lock */
859         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
860 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
861 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
862     }
863     if(ThreadId==0) {
864       job_tree.startJob = PETSC_FALSE;
865       job_tree.eJobStat = ThreadsWorking;
866     }
867     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
868     if(PeeOn==PETSC_FALSE) {
869       /* tell your subordinates it's time to get to work */
870       for(i=1; i<=Mary; i++) {
871 	SubWorker = Mary*ThreadId+i;
872         if(SubWorker<PetscMaxThreads) {
873           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
874         }
875       }
876     }
877     /* do your job */
878     if(job_tree.pdata==NULL) {
879       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
880     }
881     else {
882       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
883     }
884     if(iterr!=0) {
885       ithreaderr = 1;
886     }
887     if(PetscThreadGo) {
888       /* reset job, get ready for more */
889       if(PeeOn==PETSC_FALSE) {
890         /* check your subordinates, waiting for them to be ready
891          how do you know for a fact that a given subordinate has actually started? */
892 	for(i=1;i<=Mary;i++) {
893 	  SubWorker = Mary*ThreadId+i;
894           if(SubWorker<PetscMaxThreads) {
895             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
896             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
897               /* upon entry, automically releases the lock and blocks
898                upon return, has the lock */
899               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
900             }
901             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
902           }
903 	}
904         /* your subordinates are now ready */
905       }
906       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
907       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
908       if(ThreadId==0) {
909 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
910         /* root thread signals 'main' */
911         ierr = pthread_cond_signal(&main_cond);
912       }
913       else {
914         /* signal your boss before you go to sleep */
915         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
916       }
917     }
918   }
919   return NULL;
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "PetscThreadInitialize_Tree"
924 void* PetscThreadInitialize_Tree(PetscInt N) {
925   PetscInt i,ierr;
926   int status;
927 
928   if(PetscUseThreadPool) {
929     size_t Val1 = (size_t)CACHE_LINE_SIZE;
930     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
931     arrmutex = (char*)memalign(Val1,Val2);
932     arrcond1 = (char*)memalign(Val1,Val2);
933     arrcond2 = (char*)memalign(Val1,Val2);
934     arrstart = (char*)memalign(Val1,Val2);
935     arrready = (char*)memalign(Val1,Val2);
936     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
937     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
938     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
939     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
940     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
941     /* initialize job structure */
942     for(i=0; i<PetscMaxThreads; i++) {
943       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
944       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
945       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
946       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
947       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
948     }
949     for(i=0; i<PetscMaxThreads; i++) {
950       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
951       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
952       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
953       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
954       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
955     }
956     job_tree.pfunc = NULL;
957     job_tree.pdata = (void**)malloc(N*sizeof(void*));
958     job_tree.startJob = PETSC_FALSE;
959     job_tree.eJobStat = JobInitiated;
960     pVal = (int*)malloc(N*sizeof(int));
961     /* allocate memory in the heap for the thread structure */
962     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
963     /* create threads */
964     for(i=0; i<N; i++) {
965       pVal[i] = i;
966       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
967       /* should check status */
968     }
969   }
970   return NULL;
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "PetscThreadFinalize_Tree"
975 PetscErrorCode PetscThreadFinalize_Tree() {
976   int i,ierr;
977   void* jstatus;
978 
979   PetscFunctionBegin;
980 
981   if(PetscUseThreadPool) {
982     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
983     /* join the threads */
984     for(i=0; i<PetscMaxThreads; i++) {
985       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
986       /* do error checking*/
987     }
988     free(PetscThreadPoint);
989     free(arrmutex);
990     free(arrcond1);
991     free(arrcond2);
992     free(arrstart);
993     free(arrready);
994     free(job_tree.pdata);
995     free(pVal);
996   }
997   else {
998   }
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MainWait_Tree"
1004 void MainWait_Tree() {
1005   int ierr;
1006   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1007   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1008     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1009   }
1010   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MainJob_Tree"
1015 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1016   int i,ierr;
1017   PetscErrorCode ijoberr = 0;
1018   if(PetscUseThreadPool) {
1019     MainWait();
1020     job_tree.pfunc = pFunc;
1021     job_tree.pdata = data;
1022     job_tree.startJob = PETSC_TRUE;
1023     for(i=0; i<PetscMaxThreads; i++) {
1024       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1025     }
1026     job_tree.eJobStat = JobInitiated;
1027     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1028     if(pFunc!=FuncFinish) {
1029       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1030     }
1031   }
1032   else {
1033     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1034     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1035     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1036     free(apThread);
1037   }
1038   if(ithreaderr) {
1039     ijoberr = ithreaderr;
1040   }
1041   return ijoberr;
1042 }
1043 /****  ****/
1044 
1045 /**** 'Main' Thread Pool Functions ****/
1046 void* PetscThreadFunc_Main(void* arg) {
1047   PetscErrorCode iterr;
1048   int icorr,ierr;
1049   int* pId = (int*)arg;
1050   int ThreadId = *pId;
1051   cpu_set_t mset;
1052   //printf("Thread %d In Main Thread Function\n",ThreadId);
1053   icorr = ThreadCoreAffinity[ThreadId];
1054   CPU_ZERO(&mset);
1055   CPU_SET(icorr,&mset);
1056   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1057 
1058   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1059   /* update your ready status */
1060   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1061   /* tell the BOSS that you're ready to work before you go to sleep */
1062   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1063 
1064   /* the while loop needs to have an exit
1065      the 'main' thread can terminate all the threads by performing a broadcast
1066      and calling FuncFinish */
1067   while(PetscThreadGo) {
1068     /* need to check the condition to ensure we don't have to wait
1069        waiting when you don't have to causes problems
1070      also need to check the condition to ensure proper handling of spurious wakeups */
1071     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1072       /* upon entry, atomically releases the lock and blocks
1073        upon return, has the lock */
1074         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1075 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1076     }
1077     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1078     if(job_main.pdata==NULL) {
1079       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1080     }
1081     else {
1082       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1083     }
1084     if(iterr!=0) {
1085       ithreaderr = 1;
1086     }
1087     if(PetscThreadGo) {
1088       /* reset job, get ready for more */
1089       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1090       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1091       /* tell the BOSS that you're ready to work before you go to sleep */
1092       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1093     }
1094   }
1095   return NULL;
1096 }
1097 
1098 #undef __FUNCT__
1099 #define __FUNCT__ "PetscThreadInitialize_Main"
1100 void* PetscThreadInitialize_Main(PetscInt N) {
1101   PetscInt i,ierr;
1102   int status;
1103 
1104   if(PetscUseThreadPool) {
1105     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1106     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1107     arrmutex = (char*)memalign(Val1,Val2);
1108     arrcond1 = (char*)memalign(Val1,Val2);
1109     arrcond2 = (char*)memalign(Val1,Val2);
1110     arrstart = (char*)memalign(Val1,Val2);
1111     arrready = (char*)memalign(Val1,Val2);
1112     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1113     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1114     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1115     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1116     /* initialize job structure */
1117     for(i=0; i<PetscMaxThreads; i++) {
1118       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1119       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1120       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1121       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1122     }
1123     for(i=0; i<PetscMaxThreads; i++) {
1124       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1125       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1126       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1127       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1128     }
1129     job_main.pfunc = NULL;
1130     job_main.pdata = (void**)malloc(N*sizeof(void*));
1131     pVal = (int*)malloc(N*sizeof(int));
1132     /* allocate memory in the heap for the thread structure */
1133     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1134     /* create threads */
1135     for(i=0; i<N; i++) {
1136       pVal[i] = i;
1137       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1138       /* error check */
1139     }
1140   }
1141   else {
1142   }
1143   return NULL;
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "PetscThreadFinalize_Main"
1148 PetscErrorCode PetscThreadFinalize_Main() {
1149   int i,ierr;
1150   void* jstatus;
1151 
1152   PetscFunctionBegin;
1153 
1154   if(PetscUseThreadPool) {
1155     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1156     /* join the threads */
1157     for(i=0; i<PetscMaxThreads; i++) {
1158       ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1159     }
1160     free(PetscThreadPoint);
1161     free(arrmutex);
1162     free(arrcond1);
1163     free(arrcond2);
1164     free(arrstart);
1165     free(arrready);
1166     free(job_main.pdata);
1167     free(pVal);
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MainWait_Main"
1174 void MainWait_Main() {
1175   int i,ierr;
1176   for(i=0; i<PetscMaxThreads; i++) {
1177     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1178     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1179       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1180     }
1181     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1182   }
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MainJob_Main"
1187 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1188   int i,ierr;
1189   PetscErrorCode ijoberr = 0;
1190   if(PetscUseThreadPool) {
1191     MainWait(); /* you know everyone is waiting to be signalled! */
1192     job_main.pfunc = pFunc;
1193     job_main.pdata = data;
1194     for(i=0; i<PetscMaxThreads; i++) {
1195       *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1196     }
1197     /* tell the threads to go to work */
1198     for(i=0; i<PetscMaxThreads; i++) {
1199       ierr = pthread_cond_signal(job_main.cond2array[i]);
1200     }
1201     if(pFunc!=FuncFinish) {
1202       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1203     }
1204   }
1205   else {
1206     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1207     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1208     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1209     free(apThread);
1210   }
1211   if(ithreaderr) {
1212     ijoberr = ithreaderr;
1213   }
1214   return ijoberr;
1215 }
1216 /****  ****/
1217 
1218 /**** Chain Thread Functions ****/
1219 void* PetscThreadFunc_Chain(void* arg) {
1220   PetscErrorCode iterr;
1221   int icorr,ierr;
1222   int* pId = (int*)arg;
1223   int ThreadId = *pId;
1224   int SubWorker = ThreadId + 1;
1225   PetscBool PeeOn;
1226   cpu_set_t mset;
1227   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1228   icorr = ThreadCoreAffinity[ThreadId];
1229   CPU_ZERO(&mset);
1230   CPU_SET(icorr,&mset);
1231   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1232 
1233   if(ThreadId==(PetscMaxThreads-1)) {
1234     PeeOn = PETSC_TRUE;
1235   }
1236   else {
1237     PeeOn = PETSC_FALSE;
1238   }
1239   if(PeeOn==PETSC_FALSE) {
1240     /* check your subordinate, wait for him to be ready */
1241     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1242     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1243       /* upon entry, automically releases the lock and blocks
1244        upon return, has the lock */
1245       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1246     }
1247     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1248     /* your subordinate is now ready*/
1249   }
1250   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1251   /* update your ready status */
1252   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1253   if(ThreadId==0) {
1254     job_chain.eJobStat = JobCompleted;
1255     /* signal main */
1256     ierr = pthread_cond_signal(&main_cond);
1257   }
1258   else {
1259     /* tell your boss that you're ready to work */
1260     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1261   }
1262   /*  the while loop needs to have an exit
1263      the 'main' thread can terminate all the threads by performing a broadcast
1264    and calling FuncFinish */
1265   while(PetscThreadGo) {
1266     /* need to check the condition to ensure we don't have to wait
1267        waiting when you don't have to causes problems
1268      also need to check the condition to ensure proper handling of spurious wakeups */
1269     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1270       /*upon entry, automically releases the lock and blocks
1271        upon return, has the lock */
1272         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1273 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1274 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1275     }
1276     if(ThreadId==0) {
1277       job_chain.startJob = PETSC_FALSE;
1278       job_chain.eJobStat = ThreadsWorking;
1279     }
1280     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1281     if(PeeOn==PETSC_FALSE) {
1282       /* tell your subworker it's time to get to work */
1283       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1284     }
1285     /* do your job */
1286     if(job_chain.pdata==NULL) {
1287       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1288     }
1289     else {
1290       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1291     }
1292     if(iterr!=0) {
1293       ithreaderr = 1;
1294     }
1295     if(PetscThreadGo) {
1296       /* reset job, get ready for more */
1297       if(PeeOn==PETSC_FALSE) {
1298         /* check your subordinate, wait for him to be ready
1299          how do you know for a fact that your subordinate has actually started? */
1300         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1301         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1302           /* upon entry, automically releases the lock and blocks
1303            upon return, has the lock */
1304           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1305         }
1306         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1307         /* your subordinate is now ready */
1308       }
1309       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1310       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1311       if(ThreadId==0) {
1312 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1313         /* root thread (foreman) signals 'main' */
1314         ierr = pthread_cond_signal(&main_cond);
1315       }
1316       else {
1317         /* signal your boss before you go to sleep */
1318         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1319       }
1320     }
1321   }
1322   return NULL;
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "PetscThreadInitialize_Chain"
1327 void* PetscThreadInitialize_Chain(PetscInt N) {
1328   PetscInt i,ierr;
1329   int status;
1330 
1331   if(PetscUseThreadPool) {
1332     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1333     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1334     arrmutex = (char*)memalign(Val1,Val2);
1335     arrcond1 = (char*)memalign(Val1,Val2);
1336     arrcond2 = (char*)memalign(Val1,Val2);
1337     arrstart = (char*)memalign(Val1,Val2);
1338     arrready = (char*)memalign(Val1,Val2);
1339     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1340     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1341     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1342     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1343     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1344     /* initialize job structure */
1345     for(i=0; i<PetscMaxThreads; i++) {
1346       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1347       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1348       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1349       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1350       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1351     }
1352     for(i=0; i<PetscMaxThreads; i++) {
1353       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1354       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1355       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1356       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1357       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1358     }
1359     job_chain.pfunc = NULL;
1360     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1361     job_chain.startJob = PETSC_FALSE;
1362     job_chain.eJobStat = JobInitiated;
1363     pVal = (int*)malloc(N*sizeof(int));
1364     /* allocate memory in the heap for the thread structure */
1365     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1366     /* create threads */
1367     for(i=0; i<N; i++) {
1368       pVal[i] = i;
1369       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1370       /* should check error */
1371     }
1372   }
1373   else {
1374   }
1375   return NULL;
1376 }
1377 
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "PetscThreadFinalize_Chain"
1381 PetscErrorCode PetscThreadFinalize_Chain() {
1382   int i,ierr;
1383   void* jstatus;
1384 
1385   PetscFunctionBegin;
1386 
1387   if(PetscUseThreadPool) {
1388     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1389     /* join the threads */
1390     for(i=0; i<PetscMaxThreads; i++) {
1391       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1392       /* should check error */
1393     }
1394     free(PetscThreadPoint);
1395     free(arrmutex);
1396     free(arrcond1);
1397     free(arrcond2);
1398     free(arrstart);
1399     free(arrready);
1400     free(job_chain.pdata);
1401     free(pVal);
1402   }
1403   else {
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MainWait_Chain"
1410 void MainWait_Chain() {
1411   int ierr;
1412   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1413   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1414     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1415   }
1416   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MainJob_Chain"
1421 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1422   int i,ierr;
1423   PetscErrorCode ijoberr = 0;
1424   if(PetscUseThreadPool) {
1425     MainWait();
1426     job_chain.pfunc = pFunc;
1427     job_chain.pdata = data;
1428     job_chain.startJob = PETSC_TRUE;
1429     for(i=0; i<PetscMaxThreads; i++) {
1430       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1431     }
1432     job_chain.eJobStat = JobInitiated;
1433     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1434     if(pFunc!=FuncFinish) {
1435       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1436     }
1437   }
1438   else {
1439     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1440     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1441     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1442     free(apThread);
1443   }
1444   if(ithreaderr) {
1445     ijoberr = ithreaderr;
1446   }
1447   return ijoberr;
1448 }
1449 /****  ****/
1450 
1451 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1452 /**** True Thread Functions ****/
1453 void* PetscThreadFunc_True(void* arg) {
1454   int icorr,ierr,iVal;
1455   int* pId = (int*)arg;
1456   int ThreadId = *pId;
1457   PetscErrorCode iterr;
1458   cpu_set_t mset;
1459   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1460   icorr = ThreadCoreAffinity[ThreadId];
1461   CPU_ZERO(&mset);
1462   CPU_SET(icorr,&mset);
1463   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1464 
1465   ierr = pthread_mutex_lock(&job_true.mutex);
1466   job_true.iNumReadyThreads++;
1467   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1468     ierr = pthread_cond_signal(&main_cond);
1469   }
1470   /*the while loop needs to have an exit
1471     the 'main' thread can terminate all the threads by performing a broadcast
1472    and calling FuncFinish */
1473   while(PetscThreadGo) {
1474     /*need to check the condition to ensure we don't have to wait
1475       waiting when you don't have to causes problems
1476      also need to wait if another thread sneaks in and messes with the predicate */
1477     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1478       /* upon entry, automically releases the lock and blocks
1479        upon return, has the lock */
1480       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1481     }
1482     job_true.startJob = PETSC_FALSE;
1483     job_true.iNumJobThreads--;
1484     job_true.iNumReadyThreads--;
1485     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1486     pthread_mutex_unlock(&job_true.mutex);
1487     if(job_true.pdata==NULL) {
1488       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1489     }
1490     else {
1491       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1492     }
1493     if(iterr!=0) {
1494       ithreaderr = 1;
1495     }
1496     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1497       what happens if a thread finishes before they all start? BAD!
1498      what happens if a thread finishes before any else start? BAD! */
1499     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1500     /* reset job */
1501     if(PetscThreadGo) {
1502       pthread_mutex_lock(&job_true.mutex);
1503       job_true.iNumReadyThreads++;
1504       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1505 	/* signal the 'main' thread that the job is done! (only done once) */
1506 	ierr = pthread_cond_signal(&main_cond);
1507       }
1508     }
1509   }
1510   return NULL;
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "PetscThreadInitialize_True"
1515 void* PetscThreadInitialize_True(PetscInt N) {
1516   PetscInt i;
1517   int status;
1518 
1519   if(PetscUseThreadPool) {
1520     pVal = (int*)malloc(N*sizeof(int));
1521     /* allocate memory in the heap for the thread structure */
1522     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1523     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1524     job_true.pdata = (void**)malloc(N*sizeof(void*));
1525     for(i=0; i<N; i++) {
1526       pVal[i] = i;
1527       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1528       /* error check to ensure proper thread creation */
1529       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1530       /* should check error */
1531     }
1532   }
1533   else {
1534   }
1535   return NULL;
1536 }
1537 
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "PetscThreadFinalize_True"
1541 PetscErrorCode PetscThreadFinalize_True() {
1542   int i,ierr;
1543   void* jstatus;
1544 
1545   PetscFunctionBegin;
1546 
1547   if(PetscUseThreadPool) {
1548     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1549     /* join the threads */
1550     for(i=0; i<PetscMaxThreads; i++) {
1551       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1552       /* should check error */
1553     }
1554     free(BarrPoint);
1555     free(PetscThreadPoint);
1556   }
1557   else {
1558   }
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MainWait_True"
1564 void MainWait_True() {
1565   int ierr;
1566   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1567     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1568   }
1569   ierr = pthread_mutex_unlock(&job_true.mutex);
1570 }
1571 
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MainJob_True"
1574 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1575   int ierr;
1576   PetscErrorCode ijoberr = 0;
1577   if(PetscUseThreadPool) {
1578     MainWait();
1579     job_true.pfunc = pFunc;
1580     job_true.pdata = data;
1581     job_true.pbarr = &BarrPoint[n];
1582     job_true.iNumJobThreads = n;
1583     job_true.startJob = PETSC_TRUE;
1584     ierr = pthread_cond_broadcast(&job_true.cond);
1585     if(pFunc!=FuncFinish) {
1586       MainWait(); /* why wait after? guarantees that job gets done */
1587     }
1588   }
1589   else {
1590     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1591     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1592     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1593     free(apThread);
1594   }
1595   if(ithreaderr) {
1596     ijoberr = ithreaderr;
1597   }
1598   return ijoberr;
1599 }
1600 /**** NO THREAD POOL FUNCTION ****/
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MainJob_Spawn"
1603 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1604   PetscErrorCode ijoberr = 0;
1605 
1606   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1607   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1608   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1609   free(apThread);
1610 
1611   return ijoberr;
1612 }
1613 /****  ****/
1614 #endif
1615 
1616 void* FuncFinish(void* arg) {
1617   PetscThreadGo = PETSC_FALSE;
1618   return(0);
1619 }
1620 
1621 #endif
1622