xref: /petsc/src/sys/objects/init.c (revision af359df37090132bfea54ad0f69df140649f0fd8) !
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #define _GNU_SOURCE
10 #include <sched.h>
11 #include <petscsys.h>        /*I  "petscsys.h"   I*/
12 #if defined(PETSC_USE_PTHREAD)
13 #include <pthread.h>
14 #endif
15 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
16 #include <sys/sysinfo.h>
17 #endif
18 #include <unistd.h>
19 #if defined(PETSC_HAVE_STDLIB_H)
20 #include <stdlib.h>
21 #endif
22 #if defined(PETSC_HAVE_MALLOC_H)
23 #include <malloc.h>
24 #endif
25 #if defined(PETSC_HAVE_VALGRIND)
26 #include <valgrind/valgrind.h>
27 #endif
28 
29 /* ------------------------Nasty global variables -------------------------------*/
30 /*
31      Indicates if PETSc started up MPI, or it was
32    already started before PETSc was initialized.
33 */
34 PetscBool    PetscBeganMPI         = PETSC_FALSE;
35 PetscBool    PetscInitializeCalled = PETSC_FALSE;
36 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
37 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
38 PetscBool    PetscThreadGo         = PETSC_TRUE;
39 PetscMPIInt  PetscGlobalRank = -1;
40 PetscMPIInt  PetscGlobalSize = -1;
41 
42 #if defined(PETSC_USE_PTHREAD_CLASSES)
43 PetscMPIInt  PetscMaxThreads = 2;
44 pthread_t*   PetscThreadPoint;
45 #define PETSC_HAVE_PTHREAD_BARRIER
46 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
47 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
48 #endif
49 PetscErrorCode ithreaderr = 0;
50 int*         pVal;
51 
52 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
53 int* ThreadCoreAffinity;
54 
55 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
56 
57 typedef struct {
58   pthread_mutex_t** mutexarray;
59   pthread_cond_t**  cond1array;
60   pthread_cond_t** cond2array;
61   void* (*pfunc)(void*);
62   void** pdata;
63   PetscBool startJob;
64   estat eJobStat;
65   PetscBool** arrThreadStarted;
66   PetscBool** arrThreadReady;
67 } sjob_tree;
68 sjob_tree job_tree;
69 typedef struct {
70   pthread_mutex_t** mutexarray;
71   pthread_cond_t**  cond1array;
72   pthread_cond_t** cond2array;
73   void* (*pfunc)(void*);
74   void** pdata;
75   PetscBool** arrThreadReady;
76 } sjob_main;
77 sjob_main job_main;
78 typedef struct {
79   pthread_mutex_t** mutexarray;
80   pthread_cond_t**  cond1array;
81   pthread_cond_t** cond2array;
82   void* (*pfunc)(void*);
83   void** pdata;
84   PetscBool startJob;
85   estat eJobStat;
86   PetscBool** arrThreadStarted;
87   PetscBool** arrThreadReady;
88 } sjob_chain;
89 sjob_chain job_chain;
90 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
91 typedef struct {
92   pthread_mutex_t mutex;
93   pthread_cond_t cond;
94   void* (*pfunc)(void*);
95   void** pdata;
96   pthread_barrier_t* pbarr;
97   int iNumJobThreads;
98   int iNumReadyThreads;
99   PetscBool startJob;
100 } sjob_true;
101 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
102 #endif
103 
104 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
105 char* arrmutex; /* used by 'chain','main','tree' thread pools */
106 char* arrcond1; /* used by 'chain','main','tree' thread pools */
107 char* arrcond2; /* used by 'chain','main','tree' thread pools */
108 char* arrstart; /* used by 'chain','main','tree' thread pools */
109 char* arrready; /* used by 'chain','main','tree' thread pools */
110 
111 /* Function Pointers */
112 void*          (*PetscThreadFunc)(void*) = NULL;
113 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
114 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
115 void           (*MainWait)(void) = NULL;
116 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
117 /**** Tree Functions ****/
118 void*          PetscThreadFunc_Tree(void*);
119 void*          PetscThreadInitialize_Tree(PetscInt);
120 PetscErrorCode PetscThreadFinalize_Tree(void);
121 void           MainWait_Tree(void);
122 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
123 /**** Main Functions ****/
124 void*          PetscThreadFunc_Main(void*);
125 void*          PetscThreadInitialize_Main(PetscInt);
126 PetscErrorCode PetscThreadFinalize_Main(void);
127 void           MainWait_Main(void);
128 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
129 /**** Chain Functions ****/
130 void*          PetscThreadFunc_Chain(void*);
131 void*          PetscThreadInitialize_Chain(PetscInt);
132 PetscErrorCode PetscThreadFinalize_Chain(void);
133 void           MainWait_Chain(void);
134 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
135 /**** True Functions ****/
136 void*          PetscThreadFunc_True(void*);
137 void*          PetscThreadInitialize_True(PetscInt);
138 PetscErrorCode PetscThreadFinalize_True(void);
139 void           MainWait_True(void);
140 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
141 /****  ****/
142 
143 void* FuncFinish(void*);
144 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
145 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
146 #endif
147 
148 #if defined(PETSC_USE_COMPLEX)
149 #if defined(PETSC_COMPLEX_INSTANTIATE)
150 template <> class std::complex<double>; /* instantiate complex template class */
151 #endif
152 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
153 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
154 MPI_Datatype   MPI_C_COMPLEX;
155 #endif
156 PetscScalar    PETSC_i;
157 #else
158 PetscScalar    PETSC_i = 0.0;
159 #endif
160 #if defined(PETSC_USE_REAL___FLOAT128)
161 MPI_Datatype   MPIU___FLOAT128 = 0;
162 #endif
163 MPI_Datatype   MPIU_2SCALAR = 0;
164 MPI_Datatype   MPIU_2INT = 0;
165 
166 /*
167      These are needed by petscbt.h
168 */
169 #include <petscbt.h>
170 char      _BT_mask = ' ';
171 char      _BT_c = ' ';
172 PetscInt  _BT_idx  = 0;
173 
174 /*
175        Function that is called to display all error messages
176 */
177 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
178 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
179 #if defined(PETSC_HAVE_MATLAB_ENGINE)
180 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
181 #else
182 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
183 #endif
184 /*
185   This is needed to turn on/off cusp synchronization */
186 PetscBool   synchronizeCUSP = PETSC_FALSE;
187 
188 /* ------------------------------------------------------------------------------*/
189 /*
190    Optional file where all PETSc output from various prints is saved
191 */
192 FILE *petsc_history = PETSC_NULL;
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "PetscOpenHistoryFile"
196 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
197 {
198   PetscErrorCode ierr;
199   PetscMPIInt    rank,size;
200   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
201   char           version[256];
202 
203   PetscFunctionBegin;
204   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
205   if (!rank) {
206     char        arch[10];
207     int         err;
208     PetscViewer viewer;
209 
210     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
211     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
212     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
213     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
214     if (filename) {
215       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
216     } else {
217       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
218       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
219       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
220     }
221 
222     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
223     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
224     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
225     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
226     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
227     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
228     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
229     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
230     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
231     err = fflush(*fd);
232     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "PetscCloseHistoryFile"
239 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
240 {
241   PetscErrorCode ierr;
242   PetscMPIInt    rank;
243   char           date[64];
244   int            err;
245 
246   PetscFunctionBegin;
247   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
248   if (!rank) {
249     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
250     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
251     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
252     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
253     err = fflush(*fd);
254     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
255     err = fclose(*fd);
256     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 /* ------------------------------------------------------------------------------*/
262 
263 /*
264    This is ugly and probably belongs somewhere else, but I want to
265   be able to put a true MPI abort error handler with command line args.
266 
267     This is so MPI errors in the debugger will leave all the stack
268   frames. The default MP_Abort() cleans up and exits thus providing no useful information
269   in the debugger hence we call abort() instead of MPI_Abort().
270 */
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "Petsc_MPI_AbortOnError"
274 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
275 {
276   PetscFunctionBegin;
277   (*PetscErrorPrintf)("MPI error %d\n",*flag);
278   abort();
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
283 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
284 {
285   PetscErrorCode ierr;
286 
287   PetscFunctionBegin;
288   (*PetscErrorPrintf)("MPI error %d\n",*flag);
289   ierr = PetscAttachDebugger();
290   if (ierr) { /* hopeless so get out */
291     MPI_Abort(*comm,*flag);
292   }
293 }
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "PetscEnd"
297 /*@C
298    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
299      wishes a clean exit somewhere deep in the program.
300 
301    Collective on PETSC_COMM_WORLD
302 
303    Options Database Keys are the same as for PetscFinalize()
304 
305    Level: advanced
306 
307    Note:
308    See PetscInitialize() for more general runtime options.
309 
310 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
311 @*/
312 PetscErrorCode  PetscEnd(void)
313 {
314   PetscFunctionBegin;
315   PetscFinalize();
316   exit(0);
317   return 0;
318 }
319 
320 PetscBool    PetscOptionsPublish = PETSC_FALSE;
321 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
322 extern PetscBool  petscsetmallocvisited;
323 static char       emacsmachinename[256];
324 
325 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
326 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "PetscSetHelpVersionFunctions"
330 /*@C
331    PetscSetHelpVersionFunctions - Sets functions that print help and version information
332    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
333    This routine enables a "higher-level" package that uses PETSc to print its messages first.
334 
335    Input Parameter:
336 +  help - the help function (may be PETSC_NULL)
337 -  version - the version function (may be PETSC_NULL)
338 
339    Level: developer
340 
341    Concepts: package help message
342 
343 @*/
344 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
345 {
346   PetscFunctionBegin;
347   PetscExternalHelpFunction    = help;
348   PetscExternalVersionFunction = version;
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
354 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
355 {
356   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
357   MPI_Comm       comm = PETSC_COMM_WORLD;
358   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
359   PetscErrorCode ierr;
360   PetscReal      si;
361   int            i;
362   PetscMPIInt    rank;
363   char           version[256];
364 
365   PetscFunctionBegin;
366   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
367 
368   /*
369       Setup the memory management; support for tracing malloc() usage
370   */
371   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
372 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
373   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
374   if ((!flg2 || flg1) && !petscsetmallocvisited) {
375 #if defined(PETSC_HAVE_VALGRIND)
376     if (flg2 || !(RUNNING_ON_VALGRIND)) {
377       /* turn off default -malloc if valgrind is being used */
378 #endif
379       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
380 #if defined(PETSC_HAVE_VALGRIND)
381     }
382 #endif
383   }
384 #else
385   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
386   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
387   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
388 #endif
389   if (flg3) {
390     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
391   }
392   flg1 = PETSC_FALSE;
393   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
394   if (flg1) {
395     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
396     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
397   }
398 
399   flg1 = PETSC_FALSE;
400   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
401   if (!flg1) {
402     flg1 = PETSC_FALSE;
403     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
404   }
405   if (flg1) {
406     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
407   }
408 
409   /*
410       Set the display variable for graphics
411   */
412   ierr = PetscSetDisplay();CHKERRQ(ierr);
413 
414 
415   /*
416       Print the PETSc version information
417   */
418   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
419   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
420   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
421   if (flg1 || flg2 || flg3){
422 
423     /*
424        Print "higher-level" package version message
425     */
426     if (PetscExternalVersionFunction) {
427       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
428     }
429 
430     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
431     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
432 ------------------------------\n");CHKERRQ(ierr);
433     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
434     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
435     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
436     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
437     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
439     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
440 ------------------------------\n");CHKERRQ(ierr);
441   }
442 
443   /*
444        Print "higher-level" package help message
445   */
446   if (flg3){
447     if (PetscExternalHelpFunction) {
448       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
449     }
450   }
451 
452   /*
453       Setup the error handling
454   */
455   flg1 = PETSC_FALSE;
456   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
457   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
458   flg1 = PETSC_FALSE;
459   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
460   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
461   flg1 = PETSC_FALSE;
462   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
463   if (flg1) {
464     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
465   }
466   flg1 = PETSC_FALSE;
467   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
468   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
469   flg1 = PETSC_FALSE;
470   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
471   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
472 
473   /*
474       Setup debugger information
475   */
476   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
477   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
478   if (flg1) {
479     MPI_Errhandler err_handler;
480 
481     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
482     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
483     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
484     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
485   }
486   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
487   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
488   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
489   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
490   if (flg1 || flg2) {
491     PetscMPIInt    size;
492     PetscInt       lsize,*nodes;
493     MPI_Errhandler err_handler;
494     /*
495        we have to make sure that all processors have opened
496        connections to all other processors, otherwise once the
497        debugger has stated it is likely to receive a SIGUSR1
498        and kill the program.
499     */
500     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
501     if (size > 2) {
502       PetscMPIInt dummy = 0;
503       MPI_Status  status;
504       for (i=0; i<size; i++) {
505         if (rank != i) {
506           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
507         }
508       }
509       for (i=0; i<size; i++) {
510         if (rank != i) {
511           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
512         }
513       }
514     }
515     /* check if this processor node should be in debugger */
516     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
517     lsize = size;
518     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
519     if (flag) {
520       for (i=0; i<lsize; i++) {
521         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
522       }
523     }
524     if (!flag) {
525       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
526       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
527       if (flg1) {
528         ierr = PetscAttachDebugger();CHKERRQ(ierr);
529       } else {
530         ierr = PetscStopForDebugger();CHKERRQ(ierr);
531       }
532       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
533       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
534     }
535     ierr = PetscFree(nodes);CHKERRQ(ierr);
536   }
537 
538   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
539   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
540 
541 #if defined(PETSC_USE_SOCKET_VIEWER)
542   /*
543     Activates new sockets for zope if needed
544   */
545   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
546   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
547   if (flgz){
548     int  sockfd;
549     char hostname[256];
550     char username[256];
551     int  remoteport = 9999;
552 
553     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
554     if (!hostname[0]){
555       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
556     }
557     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
558     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
559     PETSC_ZOPEFD = fdopen(sockfd, "w");
560     if (flgzout){
561       PETSC_STDOUT = PETSC_ZOPEFD;
562       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
563       fprintf(PETSC_STDOUT, "<<<start>>>");
564     } else {
565       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
566       fprintf(PETSC_ZOPEFD, "<<<start>>>");
567     }
568   }
569 #endif
570 #if defined(PETSC_USE_SERVER)
571   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
572   if (flgz){
573     PetscInt port = PETSC_DECIDE;
574     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
575     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
576   }
577 #endif
578 
579   /*
580         Setup profiling and logging
581   */
582 #if defined (PETSC_USE_INFO)
583   {
584     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
585     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
586     if (flg1 && logname[0]) {
587       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
588     } else if (flg1) {
589       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
590     }
591   }
592 #endif
593 #if defined(PETSC_USE_LOG)
594   mname[0] = 0;
595   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
596   if (flg1) {
597     if (mname[0]) {
598       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
599     } else {
600       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
601     }
602   }
603 #if defined(PETSC_HAVE_MPE)
604   flg1 = PETSC_FALSE;
605   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
606   if (flg1) PetscLogMPEBegin();
607 #endif
608   flg1 = PETSC_FALSE;
609   flg2 = PETSC_FALSE;
610   flg3 = PETSC_FALSE;
611   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
612   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
613   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
614   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
615   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
616   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
617 
618   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
619   if (flg1) {
620     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
621     FILE *file;
622     if (mname[0]) {
623       sprintf(name,"%s.%d",mname,rank);
624       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
625       file = fopen(fname,"w");
626       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
627     } else {
628       file = PETSC_STDOUT;
629     }
630     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
631   }
632 #endif
633 
634   /*
635       Setup building of stack frames for all function calls
636   */
637 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
638   ierr = PetscStackCreate();CHKERRQ(ierr);
639 #endif
640 
641   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
642 
643 #if defined(PETSC_USE_PTHREAD_CLASSES)
644   /*
645       Determine whether user specified maximum number of threads
646    */
647   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
648 
649   /*
650       Determine whether to use thread pool
651    */
652   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
653   if (flg1) {
654     PetscUseThreadPool = PETSC_TRUE;
655     PetscInt N_CORES = get_nprocs();
656     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
657     char tstr[9];
658     char tbuf[2];
659     strcpy(tstr,"-thread");
660     for(i=0;i<PetscMaxThreads;i++) {
661       ThreadCoreAffinity[i] = i;
662       sprintf(tbuf,"%d",i);
663       strcat(tstr,tbuf);
664       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
665       if(flg1) {
666         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
667         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
668       }
669       tstr[7] = '\0';
670     }
671     /* get the thread pool type */
672     PetscInt ipool = 0;
673     const char *choices[4] = {"true","tree","main","chain"};
674 
675     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
676     switch(ipool) {
677     case 1:
678       PetscThreadFunc       = &PetscThreadFunc_Tree;
679       PetscThreadInitialize = &PetscThreadInitialize_Tree;
680       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
681       MainWait              = &MainWait_Tree;
682       MainJob               = &MainJob_Tree;
683       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
684       break;
685     case 2:
686       PetscThreadFunc       = &PetscThreadFunc_Main;
687       PetscThreadInitialize = &PetscThreadInitialize_Main;
688       PetscThreadFinalize   = &PetscThreadFinalize_Main;
689       MainWait              = &MainWait_Main;
690       MainJob               = &MainJob_Main;
691       PetscInfo(PETSC_NULL,"Using main thread pool\n");
692       break;
693 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
694     case 3:
695 #else
696     default:
697 #endif
698       PetscThreadFunc       = &PetscThreadFunc_Chain;
699       PetscThreadInitialize = &PetscThreadInitialize_Chain;
700       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
701       MainWait              = &MainWait_Chain;
702       MainJob               = &MainJob_Chain;
703       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
704       break;
705 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
706     default:
707       PetscThreadFunc       = &PetscThreadFunc_True;
708       PetscThreadInitialize = &PetscThreadInitialize_True;
709       PetscThreadFinalize   = &PetscThreadFinalize_True;
710       MainWait              = &MainWait_True;
711       MainJob               = &MainJob_True;
712       PetscInfo(PETSC_NULL,"Using true thread pool\n");
713       break;
714 #endif
715     }
716     PetscThreadInitialize(PetscMaxThreads);
717   }
718 #endif
719   /*
720        Print basic help message
721   */
722   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
723   if (flg1) {
724     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
725     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
726     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
727     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
728     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
729     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
730     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
731     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
732     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
733     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
734     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
735     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
736     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
737     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
738     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
739     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
740     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
741     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
742     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
743     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
757 #if defined(PETSC_USE_LOG)
758     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
761 #if defined(PETSC_HAVE_MPE)
762     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
763 #endif
764     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
765 #endif
766     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
768     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
769     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
770   }
771 
772   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
773   if (flg1) {
774     ierr = PetscSleep(si);CHKERRQ(ierr);
775   }
776 
777   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
778   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
779   if (f) {
780     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
781   }
782 
783 #if defined(PETSC_HAVE_CUSP)
784   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
785   if (flg3) flg1 = PETSC_TRUE;
786   else flg1 = PETSC_FALSE;
787   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
788   if (flg1) synchronizeCUSP = PETSC_TRUE;
789 #endif
790 
791   PetscFunctionReturn(0);
792 }
793 
794 #if defined(PETSC_USE_PTHREAD_CLASSES)
795 
796 /**** 'Tree' Thread Pool Functions ****/
797 void* PetscThreadFunc_Tree(void* arg) {
798   PetscErrorCode iterr;
799   int icorr,ierr;
800   int* pId = (int*)arg;
801   int ThreadId = *pId,Mary = 2,i,SubWorker;
802   PetscBool PeeOn;
803   cpu_set_t mset;
804 
805   icorr = ThreadCoreAffinity[ThreadId];
806   CPU_ZERO(&mset);
807   CPU_SET(icorr,&mset);
808   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
809 
810   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
811     PeeOn = PETSC_TRUE;
812   }
813   else {
814     PeeOn = PETSC_FALSE;
815   }
816   if(PeeOn==PETSC_FALSE) {
817     /* check your subordinates, wait for them to be ready */
818     for(i=1;i<=Mary;i++) {
819       SubWorker = Mary*ThreadId+i;
820       if(SubWorker<PetscMaxThreads) {
821         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
822         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
823           /* upon entry, automically releases the lock and blocks
824            upon return, has the lock */
825           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
826         }
827         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
828       }
829     }
830     /* your subordinates are now ready */
831   }
832   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
833   /* update your ready status */
834   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
835   if(ThreadId==0) {
836     job_tree.eJobStat = JobCompleted;
837     /* ignal main */
838     ierr = pthread_cond_signal(&main_cond);
839   }
840   else {
841     /* tell your boss that you're ready to work */
842     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
843   }
844   /* the while loop needs to have an exit
845   the 'main' thread can terminate all the threads by performing a broadcast
846    and calling FuncFinish */
847   while(PetscThreadGo) {
848     /*need to check the condition to ensure we don't have to wait
849       waiting when you don't have to causes problems
850      also need to check the condition to ensure proper handling of spurious wakeups */
851     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
852       /* upon entry, automically releases the lock and blocks
853        upon return, has the lock */
854         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
855 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
856 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
857     }
858     if(ThreadId==0) {
859       job_tree.startJob = PETSC_FALSE;
860       job_tree.eJobStat = ThreadsWorking;
861     }
862     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
863     if(PeeOn==PETSC_FALSE) {
864       /* tell your subordinates it's time to get to work */
865       for(i=1; i<=Mary; i++) {
866 	SubWorker = Mary*ThreadId+i;
867         if(SubWorker<PetscMaxThreads) {
868           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
869         }
870       }
871     }
872     /* do your job */
873     if(job_tree.pdata==NULL) {
874       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
875     }
876     else {
877       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
878     }
879     if(iterr!=0) {
880       ithreaderr = 1;
881     }
882     if(PetscThreadGo) {
883       /* reset job, get ready for more */
884       if(PeeOn==PETSC_FALSE) {
885         /* check your subordinates, waiting for them to be ready
886          how do you know for a fact that a given subordinate has actually started? */
887 	for(i=1;i<=Mary;i++) {
888 	  SubWorker = Mary*ThreadId+i;
889           if(SubWorker<PetscMaxThreads) {
890             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
891             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
892               /* upon entry, automically releases the lock and blocks
893                upon return, has the lock */
894               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
895             }
896             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
897           }
898 	}
899         /* your subordinates are now ready */
900       }
901       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
902       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
903       if(ThreadId==0) {
904 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
905         /* root thread signals 'main' */
906         ierr = pthread_cond_signal(&main_cond);
907       }
908       else {
909         /* signal your boss before you go to sleep */
910         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
911       }
912     }
913   }
914   return NULL;
915 }
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "PetscThreadInitialize_Tree"
919 void* PetscThreadInitialize_Tree(PetscInt N) {
920   PetscInt i,ierr;
921   int status;
922 
923   if(PetscUseThreadPool) {
924     size_t Val1 = (size_t)CACHE_LINE_SIZE;
925     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
926     arrmutex = (char*)memalign(Val1,Val2);
927     arrcond1 = (char*)memalign(Val1,Val2);
928     arrcond2 = (char*)memalign(Val1,Val2);
929     arrstart = (char*)memalign(Val1,Val2);
930     arrready = (char*)memalign(Val1,Val2);
931     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
932     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
933     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
934     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
935     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
936     /* initialize job structure */
937     for(i=0; i<PetscMaxThreads; i++) {
938       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
939       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
940       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
941       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
942       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
943     }
944     for(i=0; i<PetscMaxThreads; i++) {
945       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
946       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
947       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
948       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
949       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
950     }
951     job_tree.pfunc = NULL;
952     job_tree.pdata = (void**)malloc(N*sizeof(void*));
953     job_tree.startJob = PETSC_FALSE;
954     job_tree.eJobStat = JobInitiated;
955     pVal = (int*)malloc(N*sizeof(int));
956     /* allocate memory in the heap for the thread structure */
957     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
958     /* create threads */
959     for(i=0; i<N; i++) {
960       pVal[i] = i;
961       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
962       /* should check status */
963     }
964   }
965   return NULL;
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "PetscThreadFinalize_Tree"
970 PetscErrorCode PetscThreadFinalize_Tree() {
971   int i,ierr;
972   void* jstatus;
973 
974   PetscFunctionBegin;
975 
976   if(PetscUseThreadPool) {
977     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
978     /* join the threads */
979     for(i=0; i<PetscMaxThreads; i++) {
980       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
981       /* do error checking*/
982     }
983     free(PetscThreadPoint);
984     free(arrmutex);
985     free(arrcond1);
986     free(arrcond2);
987     free(arrstart);
988     free(arrready);
989     free(job_tree.pdata);
990     free(pVal);
991   }
992   else {
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "MainWait_Tree"
999 void MainWait_Tree() {
1000   int ierr;
1001   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1002   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1003     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1004   }
1005   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MainJob_Tree"
1010 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1011   int i,ierr;
1012   PetscErrorCode ijoberr = 0;
1013   if(PetscUseThreadPool) {
1014     MainWait();
1015     job_tree.pfunc = pFunc;
1016     job_tree.pdata = data;
1017     job_tree.startJob = PETSC_TRUE;
1018     for(i=0; i<PetscMaxThreads; i++) {
1019       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1020     }
1021     job_tree.eJobStat = JobInitiated;
1022     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1023     if(pFunc!=FuncFinish) {
1024       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1025     }
1026   }
1027   else {
1028     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1029     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1030     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1031     free(apThread);
1032   }
1033   if(ithreaderr) {
1034     ijoberr = ithreaderr;
1035   }
1036   return ijoberr;
1037 }
1038 /****  ****/
1039 
1040 /**** 'Main' Thread Pool Functions ****/
1041 void* PetscThreadFunc_Main(void* arg) {
1042   PetscErrorCode iterr;
1043   int icorr,ierr;
1044   int* pId = (int*)arg;
1045   int ThreadId = *pId;
1046   cpu_set_t mset;
1047 
1048   icorr = ThreadCoreAffinity[ThreadId];
1049   CPU_ZERO(&mset);
1050   CPU_SET(icorr,&mset);
1051   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1052 
1053   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1054   /* update your ready status */
1055   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1056   /* tell the BOSS that you're ready to work before you go to sleep */
1057   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1058 
1059   /* the while loop needs to have an exit
1060      the 'main' thread can terminate all the threads by performing a broadcast
1061      and calling FuncFinish */
1062   while(PetscThreadGo) {
1063     /* need to check the condition to ensure we don't have to wait
1064        waiting when you don't have to causes problems
1065      also need to check the condition to ensure proper handling of spurious wakeups */
1066     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1067       /* upon entry, atomically releases the lock and blocks
1068        upon return, has the lock */
1069         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1070 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1071     }
1072     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1073     if(job_main.pdata==NULL) {
1074       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1075     }
1076     else {
1077       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1078     }
1079     if(iterr!=0) {
1080       ithreaderr = 1;
1081     }
1082     if(PetscThreadGo) {
1083       /* reset job, get ready for more */
1084       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1085       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1086       /* tell the BOSS that you're ready to work before you go to sleep */
1087       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1088     }
1089   }
1090   return NULL;
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "PetscThreadInitialize_Main"
1095 void* PetscThreadInitialize_Main(PetscInt N) {
1096   PetscInt i,ierr;
1097   int status;
1098 
1099   if(PetscUseThreadPool) {
1100     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1101     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1102     arrmutex = (char*)memalign(Val1,Val2);
1103     arrcond1 = (char*)memalign(Val1,Val2);
1104     arrcond2 = (char*)memalign(Val1,Val2);
1105     arrstart = (char*)memalign(Val1,Val2);
1106     arrready = (char*)memalign(Val1,Val2);
1107     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1108     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1109     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1110     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1111     /* initialize job structure */
1112     for(i=0; i<PetscMaxThreads; i++) {
1113       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1114       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1115       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1116       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1117     }
1118     for(i=0; i<PetscMaxThreads; i++) {
1119       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1120       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1121       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1122       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1123     }
1124     job_main.pfunc = NULL;
1125     job_main.pdata = (void**)malloc(N*sizeof(void*));
1126     pVal = (int*)malloc(N*sizeof(int));
1127     /* allocate memory in the heap for the thread structure */
1128     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1129     /* create threads */
1130     for(i=0; i<N; i++) {
1131       pVal[i] = i;
1132       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1133       /* error check */
1134     }
1135   }
1136   else {
1137   }
1138   return NULL;
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "PetscThreadFinalize_Main"
1143 PetscErrorCode PetscThreadFinalize_Main() {
1144   int i,ierr;
1145   void* jstatus;
1146 
1147   PetscFunctionBegin;
1148 
1149   if(PetscUseThreadPool) {
1150     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1151     /* join the threads */
1152     for(i=0; i<PetscMaxThreads; i++) {
1153       ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1154     }
1155     free(PetscThreadPoint);
1156     free(arrmutex);
1157     free(arrcond1);
1158     free(arrcond2);
1159     free(arrstart);
1160     free(arrready);
1161     free(job_main.pdata);
1162     free(pVal);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MainWait_Main"
1169 void MainWait_Main() {
1170   int i,ierr;
1171   for(i=0; i<PetscMaxThreads; i++) {
1172     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1173     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1174       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1175     }
1176     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1177   }
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MainJob_Main"
1182 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1183   int i,ierr;
1184   PetscErrorCode ijoberr = 0;
1185   if(PetscUseThreadPool) {
1186     MainWait(); /* you know everyone is waiting to be signalled! */
1187     job_main.pfunc = pFunc;
1188     job_main.pdata = data;
1189     for(i=0; i<PetscMaxThreads; i++) {
1190       *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1191     }
1192     /* tell the threads to go to work */
1193     for(i=0; i<PetscMaxThreads; i++) {
1194       ierr = pthread_cond_signal(job_main.cond2array[i]);
1195     }
1196     if(pFunc!=FuncFinish) {
1197       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1198     }
1199   }
1200   else {
1201     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1202     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1203     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1204     free(apThread);
1205   }
1206   if(ithreaderr) {
1207     ijoberr = ithreaderr;
1208   }
1209   return ijoberr;
1210 }
1211 /****  ****/
1212 
1213 /**** Chain Thread Functions ****/
1214 void* PetscThreadFunc_Chain(void* arg) {
1215   PetscErrorCode iterr;
1216   int icorr,ierr;
1217   int* pId = (int*)arg;
1218   int ThreadId = *pId;
1219   int SubWorker = ThreadId + 1;
1220   PetscBool PeeOn;
1221   cpu_set_t mset;
1222 
1223   icorr = ThreadCoreAffinity[ThreadId];
1224   CPU_ZERO(&mset);
1225   CPU_SET(icorr,&mset);
1226   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1227 
1228   if(ThreadId==(PetscMaxThreads-1)) {
1229     PeeOn = PETSC_TRUE;
1230   }
1231   else {
1232     PeeOn = PETSC_FALSE;
1233   }
1234   if(PeeOn==PETSC_FALSE) {
1235     /* check your subordinate, wait for him to be ready */
1236     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1237     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1238       /* upon entry, automically releases the lock and blocks
1239        upon return, has the lock */
1240       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1241     }
1242     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1243     /* your subordinate is now ready*/
1244   }
1245   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1246   /* update your ready status */
1247   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1248   if(ThreadId==0) {
1249     job_chain.eJobStat = JobCompleted;
1250     /* signal main */
1251     ierr = pthread_cond_signal(&main_cond);
1252   }
1253   else {
1254     /* tell your boss that you're ready to work */
1255     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1256   }
1257   /*  the while loop needs to have an exit
1258      the 'main' thread can terminate all the threads by performing a broadcast
1259    and calling FuncFinish */
1260   while(PetscThreadGo) {
1261     /* need to check the condition to ensure we don't have to wait
1262        waiting when you don't have to causes problems
1263      also need to check the condition to ensure proper handling of spurious wakeups */
1264     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1265       /*upon entry, automically releases the lock and blocks
1266        upon return, has the lock */
1267         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1268 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1269 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1270     }
1271     if(ThreadId==0) {
1272       job_chain.startJob = PETSC_FALSE;
1273       job_chain.eJobStat = ThreadsWorking;
1274     }
1275     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1276     if(PeeOn==PETSC_FALSE) {
1277       /* tell your subworker it's time to get to work */
1278       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1279     }
1280     /* do your job */
1281     if(job_chain.pdata==NULL) {
1282       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1283     }
1284     else {
1285       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1286     }
1287     if(iterr!=0) {
1288       ithreaderr = 1;
1289     }
1290     if(PetscThreadGo) {
1291       /* reset job, get ready for more */
1292       if(PeeOn==PETSC_FALSE) {
1293         /* check your subordinate, wait for him to be ready
1294          how do you know for a fact that your subordinate has actually started? */
1295         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1296         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1297           /* upon entry, automically releases the lock and blocks
1298            upon return, has the lock */
1299           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1300         }
1301         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1302         /* your subordinate is now ready */
1303       }
1304       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1305       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1306       if(ThreadId==0) {
1307 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1308         /* root thread (foreman) signals 'main' */
1309         ierr = pthread_cond_signal(&main_cond);
1310       }
1311       else {
1312         /* signal your boss before you go to sleep */
1313         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1314       }
1315     }
1316   }
1317   return NULL;
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "PetscThreadInitialize_Chain"
1322 void* PetscThreadInitialize_Chain(PetscInt N) {
1323   PetscInt i,ierr;
1324   int status;
1325 
1326   if(PetscUseThreadPool) {
1327     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1328     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1329     arrmutex = (char*)memalign(Val1,Val2);
1330     arrcond1 = (char*)memalign(Val1,Val2);
1331     arrcond2 = (char*)memalign(Val1,Val2);
1332     arrstart = (char*)memalign(Val1,Val2);
1333     arrready = (char*)memalign(Val1,Val2);
1334     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1335     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1336     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1337     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1338     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1339     /* initialize job structure */
1340     for(i=0; i<PetscMaxThreads; i++) {
1341       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1342       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1343       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1344       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1345       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1346     }
1347     for(i=0; i<PetscMaxThreads; i++) {
1348       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1349       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1350       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1351       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1352       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1353     }
1354     job_chain.pfunc = NULL;
1355     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1356     job_chain.startJob = PETSC_FALSE;
1357     job_chain.eJobStat = JobInitiated;
1358     pVal = (int*)malloc(N*sizeof(int));
1359     /* allocate memory in the heap for the thread structure */
1360     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1361     /* create threads */
1362     for(i=0; i<N; i++) {
1363       pVal[i] = i;
1364       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1365       /* should check error */
1366     }
1367   }
1368   else {
1369   }
1370   return NULL;
1371 }
1372 
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "PetscThreadFinalize_Chain"
1376 PetscErrorCode PetscThreadFinalize_Chain() {
1377   int i,ierr;
1378   void* jstatus;
1379 
1380   PetscFunctionBegin;
1381 
1382   if(PetscUseThreadPool) {
1383     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1384     /* join the threads */
1385     for(i=0; i<PetscMaxThreads; i++) {
1386       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1387       /* should check error */
1388     }
1389     free(PetscThreadPoint);
1390     free(arrmutex);
1391     free(arrcond1);
1392     free(arrcond2);
1393     free(arrstart);
1394     free(arrready);
1395     free(job_chain.pdata);
1396     free(pVal);
1397   }
1398   else {
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MainWait_Chain"
1405 void MainWait_Chain() {
1406   int ierr;
1407   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1408   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1409     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1410   }
1411   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MainJob_Chain"
1416 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1417   int i,ierr;
1418   PetscErrorCode ijoberr = 0;
1419   if(PetscUseThreadPool) {
1420     MainWait();
1421     job_chain.pfunc = pFunc;
1422     job_chain.pdata = data;
1423     job_chain.startJob = PETSC_TRUE;
1424     for(i=0; i<PetscMaxThreads; i++) {
1425       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1426     }
1427     job_chain.eJobStat = JobInitiated;
1428     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1429     if(pFunc!=FuncFinish) {
1430       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1431     }
1432   }
1433   else {
1434     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1435     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1436     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1437     free(apThread);
1438   }
1439   if(ithreaderr) {
1440     ijoberr = ithreaderr;
1441   }
1442   return ijoberr;
1443 }
1444 /****  ****/
1445 
1446 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1447 /**** True Thread Functions ****/
1448 void* PetscThreadFunc_True(void* arg) {
1449   int icorr,ierr,iVal;
1450   int* pId = (int*)arg;
1451   int ThreadId = *pId;
1452   PetscErrorCode iterr;
1453   cpu_set_t mset;
1454 
1455   icorr = ThreadCoreAffinity[ThreadId];
1456   CPU_ZERO(&mset);
1457   CPU_SET(icorr,&mset);
1458   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1459 
1460   ierr = pthread_mutex_lock(&job_true.mutex);
1461   job_true.iNumReadyThreads++;
1462   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1463     ierr = pthread_cond_signal(&main_cond);
1464   }
1465   /*the while loop needs to have an exit
1466     the 'main' thread can terminate all the threads by performing a broadcast
1467    and calling FuncFinish */
1468   while(PetscThreadGo) {
1469     /*need to check the condition to ensure we don't have to wait
1470       waiting when you don't have to causes problems
1471      also need to wait if another thread sneaks in and messes with the predicate */
1472     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1473       /* upon entry, automically releases the lock and blocks
1474        upon return, has the lock */
1475       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1476     }
1477     job_true.startJob = PETSC_FALSE;
1478     job_true.iNumJobThreads--;
1479     job_true.iNumReadyThreads--;
1480     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1481     pthread_mutex_unlock(&job_true.mutex);
1482     if(job_true.pdata==NULL) {
1483       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1484     }
1485     else {
1486       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1487     }
1488     if(iterr!=0) {
1489       ithreaderr = 1;
1490     }
1491     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1492       what happens if a thread finishes before they all start? BAD!
1493      what happens if a thread finishes before any else start? BAD! */
1494     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1495     /* reset job */
1496     if(PetscThreadGo) {
1497       pthread_mutex_lock(&job_true.mutex);
1498       job_true.iNumReadyThreads++;
1499       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1500 	/* signal the 'main' thread that the job is done! (only done once) */
1501 	ierr = pthread_cond_signal(&main_cond);
1502       }
1503     }
1504   }
1505   return NULL;
1506 }
1507 
1508 #undef __FUNCT__
1509 #define __FUNCT__ "PetscThreadInitialize_True"
1510 void* PetscThreadInitialize_True(PetscInt N) {
1511   PetscInt i;
1512   int status;
1513 
1514   if(PetscUseThreadPool) {
1515     pVal = (int*)malloc(N*sizeof(int));
1516     /* allocate memory in the heap for the thread structure */
1517     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1518     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1519     job_true.pdata = (void**)malloc(N*sizeof(void*));
1520     for(i=0; i<N; i++) {
1521       pVal[i] = i;
1522       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1523       /* error check to ensure proper thread creation */
1524       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1525       /* should check error */
1526     }
1527   }
1528   else {
1529   }
1530   return NULL;
1531 }
1532 
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "PetscThreadFinalize_True"
1536 PetscErrorCode PetscThreadFinalize_True() {
1537   int i,ierr;
1538   void* jstatus;
1539 
1540   PetscFunctionBegin;
1541 
1542   if(PetscUseThreadPool) {
1543     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1544     /* join the threads */
1545     for(i=0; i<PetscMaxThreads; i++) {
1546       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1547       /* should check error */
1548     }
1549     free(BarrPoint);
1550     free(PetscThreadPoint);
1551   }
1552   else {
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MainWait_True"
1559 void MainWait_True() {
1560   int ierr;
1561   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1562     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1563   }
1564   ierr = pthread_mutex_unlock(&job_true.mutex);
1565 }
1566 
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MainJob_True"
1569 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1570   int ierr;
1571   PetscErrorCode ijoberr = 0;
1572   if(PetscUseThreadPool) {
1573     MainWait();
1574     job_true.pfunc = pFunc;
1575     job_true.pdata = data;
1576     job_true.pbarr = &BarrPoint[n];
1577     job_true.iNumJobThreads = n;
1578     job_true.startJob = PETSC_TRUE;
1579     ierr = pthread_cond_broadcast(&job_true.cond);
1580     if(pFunc!=FuncFinish) {
1581       MainWait(); /* why wait after? guarantees that job gets done */
1582     }
1583   }
1584   else {
1585     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1586     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1587     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1588     free(apThread);
1589   }
1590   if(ithreaderr) {
1591     ijoberr = ithreaderr;
1592   }
1593   return ijoberr;
1594 }
1595 /****  ****/
1596 #endif
1597 
1598 void* FuncFinish(void* arg) {
1599   PetscThreadGo = PETSC_FALSE;
1600   return(0);
1601 }
1602 
1603 #endif
1604