xref: /petsc/src/sys/objects/init.c (revision 8f54b378f5d6295426aed6820f17a2391dfde445)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #include <petscsys.h>        /*I  "petscsys.h"   I*/
10 #if defined(PETSC_HAVE_SCHED_H) && defined(PETSC_USE_PTHREAD)
11 #ifndef _GNU_SOURCE
12 #define _GNU_SOURCE
13 #endif
14 #include <sched.h>
15 #endif
16 #if defined(PETSC_USE_PTHREAD)
17 #include <pthread.h>
18 #endif
19 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
20 #include <sys/sysinfo.h>
21 #endif
22 #include <unistd.h>
23 #if defined(PETSC_HAVE_STDLIB_H)
24 #include <stdlib.h>
25 #endif
26 #if defined(PETSC_HAVE_MALLOC_H)
27 #include <malloc.h>
28 #endif
29 #if defined(PETSC_HAVE_VALGRIND)
30 #include <valgrind/valgrind.h>
31 #endif
32 
33 /* ------------------------Nasty global variables -------------------------------*/
34 /*
35      Indicates if PETSc started up MPI, or it was
36    already started before PETSc was initialized.
37 */
38 PetscBool    PetscBeganMPI         = PETSC_FALSE;
39 PetscBool    PetscInitializeCalled = PETSC_FALSE;
40 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
41 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
42 PetscBool    PetscThreadGo         = PETSC_TRUE;
43 PetscMPIInt  PetscGlobalRank = -1;
44 PetscMPIInt  PetscGlobalSize = -1;
45 
46 #if defined(PETSC_USE_PTHREAD_CLASSES)
47 PetscMPIInt  PetscMaxThreads = 2;
48 pthread_t*   PetscThreadPoint;
49 #define PETSC_HAVE_PTHREAD_BARRIER
50 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
51 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
52 #endif
53 PetscErrorCode ithreaderr = 0;
54 int*         pVal;
55 
56 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
57 int* ThreadCoreAffinity;
58 
59 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
60 
61 typedef struct {
62   pthread_mutex_t** mutexarray;
63   pthread_cond_t**  cond1array;
64   pthread_cond_t** cond2array;
65   void* (*pfunc)(void*);
66   void** pdata;
67   PetscBool startJob;
68   estat eJobStat;
69   PetscBool** arrThreadStarted;
70   PetscBool** arrThreadReady;
71 } sjob_tree;
72 sjob_tree job_tree;
73 typedef struct {
74   pthread_mutex_t** mutexarray;
75   pthread_cond_t**  cond1array;
76   pthread_cond_t** cond2array;
77   void* (*pfunc)(void*);
78   void** pdata;
79   PetscBool** arrThreadReady;
80 } sjob_main;
81 sjob_main job_main;
82 typedef struct {
83   pthread_mutex_t** mutexarray;
84   pthread_cond_t**  cond1array;
85   pthread_cond_t** cond2array;
86   void* (*pfunc)(void*);
87   void** pdata;
88   PetscBool startJob;
89   estat eJobStat;
90   PetscBool** arrThreadStarted;
91   PetscBool** arrThreadReady;
92 } sjob_chain;
93 sjob_chain job_chain;
94 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
95 typedef struct {
96   pthread_mutex_t mutex;
97   pthread_cond_t cond;
98   void* (*pfunc)(void*);
99   void** pdata;
100   pthread_barrier_t* pbarr;
101   int iNumJobThreads;
102   int iNumReadyThreads;
103   PetscBool startJob;
104 } sjob_true;
105 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
106 #endif
107 
108 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
109 char* arrmutex; /* used by 'chain','main','tree' thread pools */
110 char* arrcond1; /* used by 'chain','main','tree' thread pools */
111 char* arrcond2; /* used by 'chain','main','tree' thread pools */
112 char* arrstart; /* used by 'chain','main','tree' thread pools */
113 char* arrready; /* used by 'chain','main','tree' thread pools */
114 
115 /* Function Pointers */
116 void*          (*PetscThreadFunc)(void*) = NULL;
117 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
118 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
119 void           (*MainWait)(void) = NULL;
120 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
121 /**** Tree Functions ****/
122 void*          PetscThreadFunc_Tree(void*);
123 void*          PetscThreadInitialize_Tree(PetscInt);
124 PetscErrorCode PetscThreadFinalize_Tree(void);
125 void           MainWait_Tree(void);
126 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
127 /**** Main Functions ****/
128 void*          PetscThreadFunc_Main(void*);
129 void*          PetscThreadInitialize_Main(PetscInt);
130 PetscErrorCode PetscThreadFinalize_Main(void);
131 void           MainWait_Main(void);
132 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
133 /**** Chain Functions ****/
134 void*          PetscThreadFunc_Chain(void*);
135 void*          PetscThreadInitialize_Chain(PetscInt);
136 PetscErrorCode PetscThreadFinalize_Chain(void);
137 void           MainWait_Chain(void);
138 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
139 /**** True Functions ****/
140 void*          PetscThreadFunc_True(void*);
141 void*          PetscThreadInitialize_True(PetscInt);
142 PetscErrorCode PetscThreadFinalize_True(void);
143 void           MainWait_True(void);
144 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
145 /****  ****/
146 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
147 
148 void* FuncFinish(void*);
149 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
150 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
151 #endif
152 
153 #if defined(PETSC_USE_COMPLEX)
154 #if defined(PETSC_COMPLEX_INSTANTIATE)
155 template <> class std::complex<double>; /* instantiate complex template class */
156 #endif
157 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
158 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
159 MPI_Datatype   MPI_C_COMPLEX;
160 #endif
161 PetscScalar    PETSC_i;
162 #else
163 PetscScalar    PETSC_i = 0.0;
164 #endif
165 #if defined(PETSC_USE_REAL___FLOAT128)
166 MPI_Datatype   MPIU___FLOAT128 = 0;
167 #endif
168 MPI_Datatype   MPIU_2SCALAR = 0;
169 MPI_Datatype   MPIU_2INT = 0;
170 
171 /*
172      These are needed by petscbt.h
173 */
174 #include <petscbt.h>
175 char      _BT_mask = ' ';
176 char      _BT_c = ' ';
177 PetscInt  _BT_idx  = 0;
178 
179 /*
180        Function that is called to display all error messages
181 */
182 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
183 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
184 #if defined(PETSC_HAVE_MATLAB_ENGINE)
185 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
186 #else
187 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
188 #endif
189 /*
190   This is needed to turn on/off cusp synchronization */
191 PetscBool   synchronizeCUSP = PETSC_FALSE;
192 
193 /* ------------------------------------------------------------------------------*/
194 /*
195    Optional file where all PETSc output from various prints is saved
196 */
197 FILE *petsc_history = PETSC_NULL;
198 
199 #undef __FUNCT__
200 #define __FUNCT__ "PetscOpenHistoryFile"
201 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
202 {
203   PetscErrorCode ierr;
204   PetscMPIInt    rank,size;
205   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
206   char           version[256];
207 
208   PetscFunctionBegin;
209   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
210   if (!rank) {
211     char        arch[10];
212     int         err;
213     PetscViewer viewer;
214 
215     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
216     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
217     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
218     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
219     if (filename) {
220       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
221     } else {
222       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
223       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
224       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
225     }
226 
227     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
228     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
229     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
230     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
231     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
232     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
233     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
234     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
235     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
236     err = fflush(*fd);
237     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "PetscCloseHistoryFile"
244 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
245 {
246   PetscErrorCode ierr;
247   PetscMPIInt    rank;
248   char           date[64];
249   int            err;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
253   if (!rank) {
254     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
255     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
256     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
257     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
258     err = fflush(*fd);
259     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
260     err = fclose(*fd);
261     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
262   }
263   PetscFunctionReturn(0);
264 }
265 
266 /* ------------------------------------------------------------------------------*/
267 
268 /*
269    This is ugly and probably belongs somewhere else, but I want to
270   be able to put a true MPI abort error handler with command line args.
271 
272     This is so MPI errors in the debugger will leave all the stack
273   frames. The default MP_Abort() cleans up and exits thus providing no useful information
274   in the debugger hence we call abort() instead of MPI_Abort().
275 */
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "Petsc_MPI_AbortOnError"
279 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
280 {
281   PetscFunctionBegin;
282   (*PetscErrorPrintf)("MPI error %d\n",*flag);
283   abort();
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
288 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
289 {
290   PetscErrorCode ierr;
291 
292   PetscFunctionBegin;
293   (*PetscErrorPrintf)("MPI error %d\n",*flag);
294   ierr = PetscAttachDebugger();
295   if (ierr) { /* hopeless so get out */
296     MPI_Abort(*comm,*flag);
297   }
298 }
299 
300 #undef __FUNCT__
301 #define __FUNCT__ "PetscEnd"
302 /*@C
303    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
304      wishes a clean exit somewhere deep in the program.
305 
306    Collective on PETSC_COMM_WORLD
307 
308    Options Database Keys are the same as for PetscFinalize()
309 
310    Level: advanced
311 
312    Note:
313    See PetscInitialize() for more general runtime options.
314 
315 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
316 @*/
317 PetscErrorCode  PetscEnd(void)
318 {
319   PetscFunctionBegin;
320   PetscFinalize();
321   exit(0);
322   return 0;
323 }
324 
325 PetscBool    PetscOptionsPublish = PETSC_FALSE;
326 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
327 extern PetscBool  petscsetmallocvisited;
328 static char       emacsmachinename[256];
329 
330 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
331 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
332 
333 #undef __FUNCT__
334 #define __FUNCT__ "PetscSetHelpVersionFunctions"
335 /*@C
336    PetscSetHelpVersionFunctions - Sets functions that print help and version information
337    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
338    This routine enables a "higher-level" package that uses PETSc to print its messages first.
339 
340    Input Parameter:
341 +  help - the help function (may be PETSC_NULL)
342 -  version - the version function (may be PETSC_NULL)
343 
344    Level: developer
345 
346    Concepts: package help message
347 
348 @*/
349 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
350 {
351   PetscFunctionBegin;
352   PetscExternalHelpFunction    = help;
353   PetscExternalVersionFunction = version;
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
359 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
360 {
361   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
362   MPI_Comm       comm = PETSC_COMM_WORLD;
363   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
364   PetscErrorCode ierr;
365   PetscReal      si;
366   int            i;
367   PetscMPIInt    rank;
368   char           version[256];
369 
370   PetscFunctionBegin;
371   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
372 
373   /*
374       Setup the memory management; support for tracing malloc() usage
375   */
376   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
377 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
378   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
379   if ((!flg2 || flg1) && !petscsetmallocvisited) {
380 #if defined(PETSC_HAVE_VALGRIND)
381     if (flg2 || !(RUNNING_ON_VALGRIND)) {
382       /* turn off default -malloc if valgrind is being used */
383 #endif
384       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
385 #if defined(PETSC_HAVE_VALGRIND)
386     }
387 #endif
388   }
389 #else
390   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
391   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
392   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
393 #endif
394   if (flg3) {
395     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
396   }
397   flg1 = PETSC_FALSE;
398   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
399   if (flg1) {
400     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
401     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
402   }
403 
404   flg1 = PETSC_FALSE;
405   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
406   if (!flg1) {
407     flg1 = PETSC_FALSE;
408     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
409   }
410   if (flg1) {
411     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
412   }
413 
414   /*
415       Set the display variable for graphics
416   */
417   ierr = PetscSetDisplay();CHKERRQ(ierr);
418 
419 
420   /*
421       Print the PETSc version information
422   */
423   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
424   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
425   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
426   if (flg1 || flg2 || flg3){
427 
428     /*
429        Print "higher-level" package version message
430     */
431     if (PetscExternalVersionFunction) {
432       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
433     }
434 
435     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
436     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
437 ------------------------------\n");CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
439     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
441     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
442     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
443     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
444     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
445 ------------------------------\n");CHKERRQ(ierr);
446   }
447 
448   /*
449        Print "higher-level" package help message
450   */
451   if (flg3){
452     if (PetscExternalHelpFunction) {
453       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
454     }
455   }
456 
457   /*
458       Setup the error handling
459   */
460   flg1 = PETSC_FALSE;
461   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
462   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
463   flg1 = PETSC_FALSE;
464   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
465   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
466   flg1 = PETSC_FALSE;
467   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
468   if (flg1) {
469     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
470   }
471   flg1 = PETSC_FALSE;
472   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
473   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
474   flg1 = PETSC_FALSE;
475   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
476   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
477 
478   /*
479       Setup debugger information
480   */
481   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
482   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
483   if (flg1) {
484     MPI_Errhandler err_handler;
485 
486     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
487     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
488     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
489     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
490   }
491   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
492   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
493   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
494   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
495   if (flg1 || flg2) {
496     PetscMPIInt    size;
497     PetscInt       lsize,*nodes;
498     MPI_Errhandler err_handler;
499     /*
500        we have to make sure that all processors have opened
501        connections to all other processors, otherwise once the
502        debugger has stated it is likely to receive a SIGUSR1
503        and kill the program.
504     */
505     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
506     if (size > 2) {
507       PetscMPIInt dummy = 0;
508       MPI_Status  status;
509       for (i=0; i<size; i++) {
510         if (rank != i) {
511           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
512         }
513       }
514       for (i=0; i<size; i++) {
515         if (rank != i) {
516           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
517         }
518       }
519     }
520     /* check if this processor node should be in debugger */
521     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
522     lsize = size;
523     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
524     if (flag) {
525       for (i=0; i<lsize; i++) {
526         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
527       }
528     }
529     if (!flag) {
530       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
531       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
532       if (flg1) {
533         ierr = PetscAttachDebugger();CHKERRQ(ierr);
534       } else {
535         ierr = PetscStopForDebugger();CHKERRQ(ierr);
536       }
537       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
538       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
539     }
540     ierr = PetscFree(nodes);CHKERRQ(ierr);
541   }
542 
543   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
544   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
545 
546 #if defined(PETSC_USE_SOCKET_VIEWER)
547   /*
548     Activates new sockets for zope if needed
549   */
550   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
551   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
552   if (flgz){
553     int  sockfd;
554     char hostname[256];
555     char username[256];
556     int  remoteport = 9999;
557 
558     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
559     if (!hostname[0]){
560       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
561     }
562     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
563     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
564     PETSC_ZOPEFD = fdopen(sockfd, "w");
565     if (flgzout){
566       PETSC_STDOUT = PETSC_ZOPEFD;
567       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
568       fprintf(PETSC_STDOUT, "<<<start>>>");
569     } else {
570       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
571       fprintf(PETSC_ZOPEFD, "<<<start>>>");
572     }
573   }
574 #endif
575 #if defined(PETSC_USE_SERVER)
576   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
577   if (flgz){
578     PetscInt port = PETSC_DECIDE;
579     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
580     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
581   }
582 #endif
583 
584   /*
585         Setup profiling and logging
586   */
587 #if defined (PETSC_USE_INFO)
588   {
589     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
590     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
591     if (flg1 && logname[0]) {
592       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
593     } else if (flg1) {
594       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
595     }
596   }
597 #endif
598 #if defined(PETSC_USE_LOG)
599   mname[0] = 0;
600   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
601   if (flg1) {
602     if (mname[0]) {
603       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
604     } else {
605       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
606     }
607   }
608 #if defined(PETSC_HAVE_MPE)
609   flg1 = PETSC_FALSE;
610   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
611   if (flg1) PetscLogMPEBegin();
612 #endif
613   flg1 = PETSC_FALSE;
614   flg2 = PETSC_FALSE;
615   flg3 = PETSC_FALSE;
616   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
617   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
618   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
619   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
620   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
621   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
622 
623   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
624   if (flg1) {
625     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
626     FILE *file;
627     if (mname[0]) {
628       sprintf(name,"%s.%d",mname,rank);
629       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
630       file = fopen(fname,"w");
631       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
632     } else {
633       file = PETSC_STDOUT;
634     }
635     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
636   }
637 #endif
638 
639   /*
640       Setup building of stack frames for all function calls
641   */
642 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
643   ierr = PetscStackCreate();CHKERRQ(ierr);
644 #endif
645 
646   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
647 
648 #if defined(PETSC_USE_PTHREAD_CLASSES)
649   /*
650       Determine whether user specified maximum number of threads
651    */
652   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
653 
654   ierr = PetscOptionsHasName(PETSC_NULL,"-main",&flg1);CHKERRQ(ierr);
655   if(flg1) {
656     cpu_set_t mset;
657     int icorr,ncorr = get_nprocs();
658     ierr = PetscOptionsGetInt(PETSC_NULL,"-main",&icorr,PETSC_NULL);CHKERRQ(ierr);
659     CPU_ZERO(&mset);
660     CPU_SET(icorr%ncorr,&mset);
661     sched_setaffinity(0,sizeof(cpu_set_t),&mset);
662   }
663 
664   /*
665       Determine whether to use thread pool
666    */
667   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
668   if (flg1) {
669     PetscUseThreadPool = PETSC_TRUE;
670     PetscInt N_CORES = get_nprocs();
671     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
672     char tstr[9];
673     char tbuf[2];
674     strcpy(tstr,"-thread");
675     for(i=0;i<PetscMaxThreads;i++) {
676       ThreadCoreAffinity[i] = i;
677       sprintf(tbuf,"%d",i);
678       strcat(tstr,tbuf);
679       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
680       if(flg1) {
681         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
682         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
683       }
684       tstr[7] = '\0';
685     }
686     /* get the thread pool type */
687     PetscInt ipool = 0;
688     const char *choices[4] = {"true","tree","main","chain"};
689 
690     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
691     switch(ipool) {
692     case 1:
693       PetscThreadFunc       = &PetscThreadFunc_Tree;
694       PetscThreadInitialize = &PetscThreadInitialize_Tree;
695       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
696       MainWait              = &MainWait_Tree;
697       MainJob               = &MainJob_Tree;
698       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
699       break;
700     case 2:
701       PetscThreadFunc       = &PetscThreadFunc_Main;
702       PetscThreadInitialize = &PetscThreadInitialize_Main;
703       PetscThreadFinalize   = &PetscThreadFinalize_Main;
704       MainWait              = &MainWait_Main;
705       MainJob               = &MainJob_Main;
706       PetscInfo(PETSC_NULL,"Using main thread pool\n");
707       break;
708 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
709     case 3:
710 #else
711     default:
712 #endif
713       PetscThreadFunc       = &PetscThreadFunc_Chain;
714       PetscThreadInitialize = &PetscThreadInitialize_Chain;
715       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
716       MainWait              = &MainWait_Chain;
717       MainJob               = &MainJob_Chain;
718       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
719       break;
720 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
721     default:
722       PetscThreadFunc       = &PetscThreadFunc_True;
723       PetscThreadInitialize = &PetscThreadInitialize_True;
724       PetscThreadFinalize   = &PetscThreadFinalize_True;
725       MainWait              = &MainWait_True;
726       MainJob               = &MainJob_True;
727       PetscInfo(PETSC_NULL,"Using true thread pool\n");
728       break;
729 #endif
730     }
731     PetscThreadInitialize(PetscMaxThreads);
732   } else {
733     //need to define these in the case on 'no threads' or 'thread create/destroy'
734     //could take any of the above versions
735     MainJob               = &MainJob_Spawn;
736   }
737 #endif
738   /*
739        Print basic help message
740   */
741   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
742   if (flg1) {
743     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
763     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
764     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
768     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
769     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
770     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
771     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
773     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
774     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
776     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
777 #if defined(PETSC_USE_LOG)
778     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
779     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
780     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
781 #if defined(PETSC_HAVE_MPE)
782     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
783 #endif
784     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
785 #endif
786     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
787     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
788     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
789     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
790   }
791 
792   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
793   if (flg1) {
794     ierr = PetscSleep(si);CHKERRQ(ierr);
795   }
796 
797   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
798   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
799   if (f) {
800     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
801   }
802 
803 #if defined(PETSC_HAVE_CUSP)
804   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
805   if (flg3) flg1 = PETSC_TRUE;
806   else flg1 = PETSC_FALSE;
807   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
808   if (flg1) synchronizeCUSP = PETSC_TRUE;
809 #endif
810 
811   PetscFunctionReturn(0);
812 }
813 
814 #if defined(PETSC_USE_PTHREAD_CLASSES)
815 
816 /**** 'Tree' Thread Pool Functions ****/
817 void* PetscThreadFunc_Tree(void* arg) {
818   PetscErrorCode iterr;
819   int icorr,ierr;
820   int* pId = (int*)arg;
821   int ThreadId = *pId,Mary = 2,i,SubWorker;
822   PetscBool PeeOn;
823   cpu_set_t mset;
824   //printf("Thread %d In Tree Thread Function\n",ThreadId);
825   icorr = ThreadCoreAffinity[ThreadId];
826   CPU_ZERO(&mset);
827   CPU_SET(icorr,&mset);
828   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
829 
830   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
831     PeeOn = PETSC_TRUE;
832   }
833   else {
834     PeeOn = PETSC_FALSE;
835   }
836   if(PeeOn==PETSC_FALSE) {
837     /* check your subordinates, wait for them to be ready */
838     for(i=1;i<=Mary;i++) {
839       SubWorker = Mary*ThreadId+i;
840       if(SubWorker<PetscMaxThreads) {
841         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
842         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
843           /* upon entry, automically releases the lock and blocks
844            upon return, has the lock */
845           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
846         }
847         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
848       }
849     }
850     /* your subordinates are now ready */
851   }
852   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
853   /* update your ready status */
854   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
855   if(ThreadId==0) {
856     job_tree.eJobStat = JobCompleted;
857     /* ignal main */
858     ierr = pthread_cond_signal(&main_cond);
859   }
860   else {
861     /* tell your boss that you're ready to work */
862     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
863   }
864   /* the while loop needs to have an exit
865   the 'main' thread can terminate all the threads by performing a broadcast
866    and calling FuncFinish */
867   while(PetscThreadGo) {
868     /*need to check the condition to ensure we don't have to wait
869       waiting when you don't have to causes problems
870      also need to check the condition to ensure proper handling of spurious wakeups */
871     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
872       /* upon entry, automically releases the lock and blocks
873        upon return, has the lock */
874         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
875 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
876 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
877     }
878     if(ThreadId==0) {
879       job_tree.startJob = PETSC_FALSE;
880       job_tree.eJobStat = ThreadsWorking;
881     }
882     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
883     if(PeeOn==PETSC_FALSE) {
884       /* tell your subordinates it's time to get to work */
885       for(i=1; i<=Mary; i++) {
886 	SubWorker = Mary*ThreadId+i;
887         if(SubWorker<PetscMaxThreads) {
888           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
889         }
890       }
891     }
892     /* do your job */
893     if(job_tree.pdata==NULL) {
894       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
895     }
896     else {
897       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
898     }
899     if(iterr!=0) {
900       ithreaderr = 1;
901     }
902     if(PetscThreadGo) {
903       /* reset job, get ready for more */
904       if(PeeOn==PETSC_FALSE) {
905         /* check your subordinates, waiting for them to be ready
906          how do you know for a fact that a given subordinate has actually started? */
907 	for(i=1;i<=Mary;i++) {
908 	  SubWorker = Mary*ThreadId+i;
909           if(SubWorker<PetscMaxThreads) {
910             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
911             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
912               /* upon entry, automically releases the lock and blocks
913                upon return, has the lock */
914               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
915             }
916             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
917           }
918 	}
919         /* your subordinates are now ready */
920       }
921       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
922       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
923       if(ThreadId==0) {
924 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
925         /* root thread signals 'main' */
926         ierr = pthread_cond_signal(&main_cond);
927       }
928       else {
929         /* signal your boss before you go to sleep */
930         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
931       }
932     }
933   }
934   return NULL;
935 }
936 
937 #undef __FUNCT__
938 #define __FUNCT__ "PetscThreadInitialize_Tree"
939 void* PetscThreadInitialize_Tree(PetscInt N) {
940   PetscInt i,ierr;
941   int status;
942 
943   if(PetscUseThreadPool) {
944     size_t Val1 = (size_t)CACHE_LINE_SIZE;
945     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
946     arrmutex = (char*)memalign(Val1,Val2);
947     arrcond1 = (char*)memalign(Val1,Val2);
948     arrcond2 = (char*)memalign(Val1,Val2);
949     arrstart = (char*)memalign(Val1,Val2);
950     arrready = (char*)memalign(Val1,Val2);
951     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
952     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
953     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
954     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
955     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
956     /* initialize job structure */
957     for(i=0; i<PetscMaxThreads; i++) {
958       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
959       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
960       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
961       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
962       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
963     }
964     for(i=0; i<PetscMaxThreads; i++) {
965       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
966       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
967       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
968       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
969       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
970     }
971     job_tree.pfunc = NULL;
972     job_tree.pdata = (void**)malloc(N*sizeof(void*));
973     job_tree.startJob = PETSC_FALSE;
974     job_tree.eJobStat = JobInitiated;
975     pVal = (int*)malloc(N*sizeof(int));
976     /* allocate memory in the heap for the thread structure */
977     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
978     /* create threads */
979     for(i=0; i<N; i++) {
980       pVal[i] = i;
981       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
982       /* should check status */
983     }
984   }
985   return NULL;
986 }
987 
988 #undef __FUNCT__
989 #define __FUNCT__ "PetscThreadFinalize_Tree"
990 PetscErrorCode PetscThreadFinalize_Tree() {
991   int i,ierr;
992   void* jstatus;
993 
994   PetscFunctionBegin;
995 
996   if(PetscUseThreadPool) {
997     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
998     /* join the threads */
999     for(i=0; i<PetscMaxThreads; i++) {
1000       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1001       /* do error checking*/
1002     }
1003     free(PetscThreadPoint);
1004     free(arrmutex);
1005     free(arrcond1);
1006     free(arrcond2);
1007     free(arrstart);
1008     free(arrready);
1009     free(job_tree.pdata);
1010     free(pVal);
1011   }
1012   else {
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MainWait_Tree"
1019 void MainWait_Tree() {
1020   int ierr;
1021   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1022   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1023     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1024   }
1025   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MainJob_Tree"
1030 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1031   int i,ierr;
1032   PetscErrorCode ijoberr = 0;
1033   if(PetscUseThreadPool) {
1034     MainWait();
1035     job_tree.pfunc = pFunc;
1036     job_tree.pdata = data;
1037     job_tree.startJob = PETSC_TRUE;
1038     for(i=0; i<PetscMaxThreads; i++) {
1039       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1040     }
1041     job_tree.eJobStat = JobInitiated;
1042     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1043     if(pFunc!=FuncFinish) {
1044       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1045     }
1046   }
1047   else {
1048     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1049     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1050     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1051     free(apThread);
1052   }
1053   if(ithreaderr) {
1054     ijoberr = ithreaderr;
1055   }
1056   return ijoberr;
1057 }
1058 /****  ****/
1059 
1060 /**** 'Main' Thread Pool Functions ****/
1061 void* PetscThreadFunc_Main(void* arg) {
1062   PetscErrorCode iterr;
1063   int icorr,ierr;
1064   int* pId = (int*)arg;
1065   int ThreadId = *pId;
1066   cpu_set_t mset;
1067   //printf("Thread %d In Main Thread Function\n",ThreadId);
1068   icorr = ThreadCoreAffinity[ThreadId];
1069   CPU_ZERO(&mset);
1070   CPU_SET(icorr,&mset);
1071   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1072 
1073   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1074   /* update your ready status */
1075   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1076   /* tell the BOSS that you're ready to work before you go to sleep */
1077   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1078 
1079   /* the while loop needs to have an exit
1080      the 'main' thread can terminate all the threads by performing a broadcast
1081      and calling FuncFinish */
1082   while(PetscThreadGo) {
1083     /* need to check the condition to ensure we don't have to wait
1084        waiting when you don't have to causes problems
1085      also need to check the condition to ensure proper handling of spurious wakeups */
1086     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1087       /* upon entry, atomically releases the lock and blocks
1088        upon return, has the lock */
1089         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1090 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1091     }
1092     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1093     if(job_main.pdata==NULL) {
1094       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1095     }
1096     else {
1097       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1098     }
1099     if(iterr!=0) {
1100       ithreaderr = 1;
1101     }
1102     if(PetscThreadGo) {
1103       /* reset job, get ready for more */
1104       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1105       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1106       /* tell the BOSS that you're ready to work before you go to sleep */
1107       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1108     }
1109   }
1110   return NULL;
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "PetscThreadInitialize_Main"
1115 void* PetscThreadInitialize_Main(PetscInt N) {
1116   PetscInt i,ierr;
1117   int status;
1118 
1119   if(PetscUseThreadPool) {
1120     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1121     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1122     arrmutex = (char*)memalign(Val1,Val2);
1123     arrcond1 = (char*)memalign(Val1,Val2);
1124     arrcond2 = (char*)memalign(Val1,Val2);
1125     arrstart = (char*)memalign(Val1,Val2);
1126     arrready = (char*)memalign(Val1,Val2);
1127     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1128     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1129     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1130     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1131     /* initialize job structure */
1132     for(i=0; i<PetscMaxThreads; i++) {
1133       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1134       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1135       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1136       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1137     }
1138     for(i=0; i<PetscMaxThreads; i++) {
1139       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1140       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1141       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1142       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1143     }
1144     job_main.pfunc = NULL;
1145     job_main.pdata = (void**)malloc(N*sizeof(void*));
1146     pVal = (int*)malloc(N*sizeof(int));
1147     /* allocate memory in the heap for the thread structure */
1148     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1149     /* create threads */
1150     for(i=0; i<N; i++) {
1151       pVal[i] = i;
1152       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1153       /* error check */
1154     }
1155   }
1156   else {
1157   }
1158   return NULL;
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "PetscThreadFinalize_Main"
1163 PetscErrorCode PetscThreadFinalize_Main() {
1164   int i,ierr;
1165   void* jstatus;
1166 
1167   PetscFunctionBegin;
1168 
1169   if(PetscUseThreadPool) {
1170     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1171     /* join the threads */
1172     for(i=0; i<PetscMaxThreads; i++) {
1173       ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1174     }
1175     free(PetscThreadPoint);
1176     free(arrmutex);
1177     free(arrcond1);
1178     free(arrcond2);
1179     free(arrstart);
1180     free(arrready);
1181     free(job_main.pdata);
1182     free(pVal);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MainWait_Main"
1189 void MainWait_Main() {
1190   int i,ierr;
1191   for(i=0; i<PetscMaxThreads; i++) {
1192     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1193     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1194       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1195     }
1196     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1197   }
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MainJob_Main"
1202 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1203   int i,ierr;
1204   PetscErrorCode ijoberr = 0;
1205   if(PetscUseThreadPool) {
1206     MainWait(); /* you know everyone is waiting to be signalled! */
1207     job_main.pfunc = pFunc;
1208     job_main.pdata = data;
1209     for(i=0; i<PetscMaxThreads; i++) {
1210       *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1211     }
1212     /* tell the threads to go to work */
1213     for(i=0; i<PetscMaxThreads; i++) {
1214       ierr = pthread_cond_signal(job_main.cond2array[i]);
1215     }
1216     if(pFunc!=FuncFinish) {
1217       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1218     }
1219   }
1220   else {
1221     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1222     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1223     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1224     free(apThread);
1225   }
1226   if(ithreaderr) {
1227     ijoberr = ithreaderr;
1228   }
1229   return ijoberr;
1230 }
1231 /****  ****/
1232 
1233 /**** Chain Thread Functions ****/
1234 void* PetscThreadFunc_Chain(void* arg) {
1235   PetscErrorCode iterr;
1236   int icorr,ierr;
1237   int* pId = (int*)arg;
1238   int ThreadId = *pId;
1239   int SubWorker = ThreadId + 1;
1240   PetscBool PeeOn;
1241   cpu_set_t mset;
1242   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1243   icorr = ThreadCoreAffinity[ThreadId];
1244   CPU_ZERO(&mset);
1245   CPU_SET(icorr,&mset);
1246   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1247 
1248   if(ThreadId==(PetscMaxThreads-1)) {
1249     PeeOn = PETSC_TRUE;
1250   }
1251   else {
1252     PeeOn = PETSC_FALSE;
1253   }
1254   if(PeeOn==PETSC_FALSE) {
1255     /* check your subordinate, wait for him to be ready */
1256     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1257     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1258       /* upon entry, automically releases the lock and blocks
1259        upon return, has the lock */
1260       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1261     }
1262     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1263     /* your subordinate is now ready*/
1264   }
1265   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1266   /* update your ready status */
1267   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1268   if(ThreadId==0) {
1269     job_chain.eJobStat = JobCompleted;
1270     /* signal main */
1271     ierr = pthread_cond_signal(&main_cond);
1272   }
1273   else {
1274     /* tell your boss that you're ready to work */
1275     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1276   }
1277   /*  the while loop needs to have an exit
1278      the 'main' thread can terminate all the threads by performing a broadcast
1279    and calling FuncFinish */
1280   while(PetscThreadGo) {
1281     /* need to check the condition to ensure we don't have to wait
1282        waiting when you don't have to causes problems
1283      also need to check the condition to ensure proper handling of spurious wakeups */
1284     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1285       /*upon entry, automically releases the lock and blocks
1286        upon return, has the lock */
1287         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1288 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1289 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1290     }
1291     if(ThreadId==0) {
1292       job_chain.startJob = PETSC_FALSE;
1293       job_chain.eJobStat = ThreadsWorking;
1294     }
1295     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1296     if(PeeOn==PETSC_FALSE) {
1297       /* tell your subworker it's time to get to work */
1298       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1299     }
1300     /* do your job */
1301     if(job_chain.pdata==NULL) {
1302       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1303     }
1304     else {
1305       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1306     }
1307     if(iterr!=0) {
1308       ithreaderr = 1;
1309     }
1310     if(PetscThreadGo) {
1311       /* reset job, get ready for more */
1312       if(PeeOn==PETSC_FALSE) {
1313         /* check your subordinate, wait for him to be ready
1314          how do you know for a fact that your subordinate has actually started? */
1315         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1316         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1317           /* upon entry, automically releases the lock and blocks
1318            upon return, has the lock */
1319           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1320         }
1321         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1322         /* your subordinate is now ready */
1323       }
1324       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1325       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1326       if(ThreadId==0) {
1327 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1328         /* root thread (foreman) signals 'main' */
1329         ierr = pthread_cond_signal(&main_cond);
1330       }
1331       else {
1332         /* signal your boss before you go to sleep */
1333         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1334       }
1335     }
1336   }
1337   return NULL;
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "PetscThreadInitialize_Chain"
1342 void* PetscThreadInitialize_Chain(PetscInt N) {
1343   PetscInt i,ierr;
1344   int status;
1345 
1346   if(PetscUseThreadPool) {
1347     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1348     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1349     arrmutex = (char*)memalign(Val1,Val2);
1350     arrcond1 = (char*)memalign(Val1,Val2);
1351     arrcond2 = (char*)memalign(Val1,Val2);
1352     arrstart = (char*)memalign(Val1,Val2);
1353     arrready = (char*)memalign(Val1,Val2);
1354     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1355     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1356     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1357     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1358     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1359     /* initialize job structure */
1360     for(i=0; i<PetscMaxThreads; i++) {
1361       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1362       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1363       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1364       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1365       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1366     }
1367     for(i=0; i<PetscMaxThreads; i++) {
1368       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1369       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1370       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1371       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1372       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1373     }
1374     job_chain.pfunc = NULL;
1375     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1376     job_chain.startJob = PETSC_FALSE;
1377     job_chain.eJobStat = JobInitiated;
1378     pVal = (int*)malloc(N*sizeof(int));
1379     /* allocate memory in the heap for the thread structure */
1380     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1381     /* create threads */
1382     for(i=0; i<N; i++) {
1383       pVal[i] = i;
1384       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1385       /* should check error */
1386     }
1387   }
1388   else {
1389   }
1390   return NULL;
1391 }
1392 
1393 
1394 #undef __FUNCT__
1395 #define __FUNCT__ "PetscThreadFinalize_Chain"
1396 PetscErrorCode PetscThreadFinalize_Chain() {
1397   int i,ierr;
1398   void* jstatus;
1399 
1400   PetscFunctionBegin;
1401 
1402   if(PetscUseThreadPool) {
1403     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1404     /* join the threads */
1405     for(i=0; i<PetscMaxThreads; i++) {
1406       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1407       /* should check error */
1408     }
1409     free(PetscThreadPoint);
1410     free(arrmutex);
1411     free(arrcond1);
1412     free(arrcond2);
1413     free(arrstart);
1414     free(arrready);
1415     free(job_chain.pdata);
1416     free(pVal);
1417   }
1418   else {
1419   }
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MainWait_Chain"
1425 void MainWait_Chain() {
1426   int ierr;
1427   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1428   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1429     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1430   }
1431   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MainJob_Chain"
1436 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1437   int i,ierr;
1438   PetscErrorCode ijoberr = 0;
1439   if(PetscUseThreadPool) {
1440     MainWait();
1441     job_chain.pfunc = pFunc;
1442     job_chain.pdata = data;
1443     job_chain.startJob = PETSC_TRUE;
1444     for(i=0; i<PetscMaxThreads; i++) {
1445       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1446     }
1447     job_chain.eJobStat = JobInitiated;
1448     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1449     if(pFunc!=FuncFinish) {
1450       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1451     }
1452   }
1453   else {
1454     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1455     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1456     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1457     free(apThread);
1458   }
1459   if(ithreaderr) {
1460     ijoberr = ithreaderr;
1461   }
1462   return ijoberr;
1463 }
1464 /****  ****/
1465 
1466 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1467 /**** True Thread Functions ****/
1468 void* PetscThreadFunc_True(void* arg) {
1469   int icorr,ierr,iVal;
1470   int* pId = (int*)arg;
1471   int ThreadId = *pId;
1472   PetscErrorCode iterr;
1473   cpu_set_t mset;
1474   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1475   icorr = ThreadCoreAffinity[ThreadId];
1476   CPU_ZERO(&mset);
1477   CPU_SET(icorr,&mset);
1478   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1479 
1480   ierr = pthread_mutex_lock(&job_true.mutex);
1481   job_true.iNumReadyThreads++;
1482   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1483     ierr = pthread_cond_signal(&main_cond);
1484   }
1485   /*the while loop needs to have an exit
1486     the 'main' thread can terminate all the threads by performing a broadcast
1487    and calling FuncFinish */
1488   while(PetscThreadGo) {
1489     /*need to check the condition to ensure we don't have to wait
1490       waiting when you don't have to causes problems
1491      also need to wait if another thread sneaks in and messes with the predicate */
1492     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1493       /* upon entry, automically releases the lock and blocks
1494        upon return, has the lock */
1495       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1496     }
1497     job_true.startJob = PETSC_FALSE;
1498     job_true.iNumJobThreads--;
1499     job_true.iNumReadyThreads--;
1500     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1501     pthread_mutex_unlock(&job_true.mutex);
1502     if(job_true.pdata==NULL) {
1503       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1504     }
1505     else {
1506       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1507     }
1508     if(iterr!=0) {
1509       ithreaderr = 1;
1510     }
1511     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1512       what happens if a thread finishes before they all start? BAD!
1513      what happens if a thread finishes before any else start? BAD! */
1514     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1515     /* reset job */
1516     if(PetscThreadGo) {
1517       pthread_mutex_lock(&job_true.mutex);
1518       job_true.iNumReadyThreads++;
1519       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1520 	/* signal the 'main' thread that the job is done! (only done once) */
1521 	ierr = pthread_cond_signal(&main_cond);
1522       }
1523     }
1524   }
1525   return NULL;
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "PetscThreadInitialize_True"
1530 void* PetscThreadInitialize_True(PetscInt N) {
1531   PetscInt i;
1532   int status;
1533 
1534   if(PetscUseThreadPool) {
1535     pVal = (int*)malloc(N*sizeof(int));
1536     /* allocate memory in the heap for the thread structure */
1537     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1538     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1539     job_true.pdata = (void**)malloc(N*sizeof(void*));
1540     for(i=0; i<N; i++) {
1541       pVal[i] = i;
1542       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1543       /* error check to ensure proper thread creation */
1544       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1545       /* should check error */
1546     }
1547   }
1548   else {
1549   }
1550   return NULL;
1551 }
1552 
1553 
1554 #undef __FUNCT__
1555 #define __FUNCT__ "PetscThreadFinalize_True"
1556 PetscErrorCode PetscThreadFinalize_True() {
1557   int i,ierr;
1558   void* jstatus;
1559 
1560   PetscFunctionBegin;
1561 
1562   if(PetscUseThreadPool) {
1563     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1564     /* join the threads */
1565     for(i=0; i<PetscMaxThreads; i++) {
1566       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1567       /* should check error */
1568     }
1569     free(BarrPoint);
1570     free(PetscThreadPoint);
1571   }
1572   else {
1573   }
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MainWait_True"
1579 void MainWait_True() {
1580   int ierr;
1581   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1582     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1583   }
1584   ierr = pthread_mutex_unlock(&job_true.mutex);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MainJob_True"
1589 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1590   int ierr;
1591   PetscErrorCode ijoberr = 0;
1592   if(PetscUseThreadPool) {
1593     MainWait();
1594     job_true.pfunc = pFunc;
1595     job_true.pdata = data;
1596     job_true.pbarr = &BarrPoint[n];
1597     job_true.iNumJobThreads = n;
1598     job_true.startJob = PETSC_TRUE;
1599     ierr = pthread_cond_broadcast(&job_true.cond);
1600     if(pFunc!=FuncFinish) {
1601       MainWait(); /* why wait after? guarantees that job gets done */
1602     }
1603   }
1604   else {
1605     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1606     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1607     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1608     free(apThread);
1609   }
1610   if(ithreaderr) {
1611     ijoberr = ithreaderr;
1612   }
1613   return ijoberr;
1614 }
1615 /**** NO THREAD POOL FUNCTION ****/
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MainJob_Spawn"
1618 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1619   PetscErrorCode ijoberr = 0;
1620 
1621   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1622   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1623   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1624   free(apThread);
1625 
1626   return ijoberr;
1627 }
1628 /****  ****/
1629 #endif
1630 
1631 void* FuncFinish(void* arg) {
1632   PetscThreadGo = PETSC_FALSE;
1633   return(0);
1634 }
1635 
1636 #endif
1637