xref: /petsc/src/sys/objects/init.c (revision fc087633cc6752a849b99017f1e2f5f28a8baece)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #define _GNU_SOURCE
10 #include <sched.h>
11 #include <petscsys.h>        /*I  "petscsys.h"   I*/
12 #if defined(PETSC_USE_PTHREAD)
13 #include <pthread.h>
14 #endif
15 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
16 #include <sys/sysinfo.h>
17 #endif
18 #include <unistd.h>
19 #if defined(PETSC_HAVE_STDLIB_H)
20 #include <stdlib.h>
21 #endif
22 #if defined(PETSC_HAVE_MALLOC_H)
23 #include <malloc.h>
24 #endif
25 #if defined(PETSC_HAVE_VALGRIND)
26 #include <valgrind/valgrind.h>
27 #endif
28 
29 /* ------------------------Nasty global variables -------------------------------*/
30 /*
31      Indicates if PETSc started up MPI, or it was
32    already started before PETSc was initialized.
33 */
34 PetscBool    PetscBeganMPI         = PETSC_FALSE;
35 PetscBool    PetscInitializeCalled = PETSC_FALSE;
36 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
37 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
38 PetscBool    PetscThreadGo         = PETSC_TRUE;
39 PetscMPIInt  PetscGlobalRank = -1;
40 PetscMPIInt  PetscGlobalSize = -1;
41 
42 #if defined(PETSC_USE_PTHREAD_CLASSES)
43 PetscMPIInt  PetscMaxThreads = 2;
44 pthread_t*   PetscThreadPoint;
45 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
46 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
47 #endif
48 PetscErrorCode ithreaderr = 0;
49 int*         pVal;
50 
51 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
52 int* ThreadCoreAffinity;
53 
54 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
55 
56 typedef struct {
57   pthread_mutex_t** mutexarray;
58   pthread_cond_t**  cond1array;
59   pthread_cond_t** cond2array;
60   void* (*pfunc)(void*);
61   void** pdata;
62   PetscBool startJob;
63   estat eJobStat;
64   PetscBool** arrThreadStarted;
65   PetscBool** arrThreadReady;
66 } sjob_tree;
67 sjob_tree job_tree;
68 typedef struct {
69   pthread_mutex_t** mutexarray;
70   pthread_cond_t**  cond1array;
71   pthread_cond_t** cond2array;
72   void* (*pfunc)(void*);
73   void** pdata;
74   PetscBool** arrThreadReady;
75 } sjob_main;
76 sjob_main job_main;
77 typedef struct {
78   pthread_mutex_t** mutexarray;
79   pthread_cond_t**  cond1array;
80   pthread_cond_t** cond2array;
81   void* (*pfunc)(void*);
82   void** pdata;
83   PetscBool startJob;
84   estat eJobStat;
85   PetscBool** arrThreadStarted;
86   PetscBool** arrThreadReady;
87 } sjob_chain;
88 sjob_chain job_chain;
89 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
90 typedef struct {
91   pthread_mutex_t mutex;
92   pthread_cond_t cond;
93   void* (*pfunc)(void*);
94   void** pdata;
95   pthread_barrier_t* pbarr;
96   int iNumJobThreads;
97   int iNumReadyThreads;
98   PetscBool startJob;
99 } sjob_true;
100 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
101 #endif
102 
103 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
104 char* arrmutex; /* used by 'chain','main','tree' thread pools */
105 char* arrcond1; /* used by 'chain','main','tree' thread pools */
106 char* arrcond2; /* used by 'chain','main','tree' thread pools */
107 char* arrstart; /* used by 'chain','main','tree' thread pools */
108 char* arrready; /* used by 'chain','main','tree' thread pools */
109 
110 /* Function Pointers */
111 void*          (*PetscThreadFunc)(void*) = NULL;
112 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
113 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
114 void           (*MainWait)(void) = NULL;
115 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
116 /**** Tree Functions ****/
117 void*          PetscThreadFunc_Tree(void*);
118 void*          PetscThreadInitialize_Tree(PetscInt);
119 PetscErrorCode PetscThreadFinalize_Tree(void);
120 void           MainWait_Tree(void);
121 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
122 /**** Main Functions ****/
123 void*          PetscThreadFunc_Main(void*);
124 void*          PetscThreadInitialize_Main(PetscInt);
125 PetscErrorCode PetscThreadFinalize_Main(void);
126 void           MainWait_Main(void);
127 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
128 /**** Chain Functions ****/
129 void*          PetscThreadFunc_Chain(void*);
130 void*          PetscThreadInitialize_Chain(PetscInt);
131 PetscErrorCode PetscThreadFinalize_Chain(void);
132 void           MainWait_Chain(void);
133 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
134 /**** True Functions ****/
135 void*          PetscThreadFunc_True(void*);
136 void*          PetscThreadInitialize_True(PetscInt);
137 PetscErrorCode PetscThreadFinalize_True(void);
138 void           MainWait_True(void);
139 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
140 /****  ****/
141 
142 void* FuncFinish(void*);
143 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
144 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
145 #endif
146 
147 #if defined(PETSC_USE_COMPLEX)
148 #if defined(PETSC_COMPLEX_INSTANTIATE)
149 template <> class std::complex<double>; /* instantiate complex template class */
150 #endif
151 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
152 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
153 MPI_Datatype   MPI_C_COMPLEX;
154 #endif
155 PetscScalar    PETSC_i;
156 #else
157 PetscScalar    PETSC_i = 0.0;
158 #endif
159 #if defined(PETSC_USE_REAL___FLOAT128)
160 MPI_Datatype   MPIU___FLOAT128 = 0;
161 #endif
162 MPI_Datatype   MPIU_2SCALAR = 0;
163 MPI_Datatype   MPIU_2INT = 0;
164 
165 /*
166      These are needed by petscbt.h
167 */
168 #include <petscbt.h>
169 char      _BT_mask = ' ';
170 char      _BT_c = ' ';
171 PetscInt  _BT_idx  = 0;
172 
173 /*
174        Function that is called to display all error messages
175 */
176 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
177 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
178 #if defined(PETSC_HAVE_MATLAB_ENGINE)
179 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
180 #else
181 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
182 #endif
183 /*
184   This is needed to turn on/off cusp synchronization */
185 PetscBool   synchronizeCUSP = PETSC_FALSE;
186 
187 /* ------------------------------------------------------------------------------*/
188 /*
189    Optional file where all PETSc output from various prints is saved
190 */
191 FILE *petsc_history = PETSC_NULL;
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "PetscOpenHistoryFile"
195 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
196 {
197   PetscErrorCode ierr;
198   PetscMPIInt    rank,size;
199   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
200   char           version[256];
201 
202   PetscFunctionBegin;
203   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
204   if (!rank) {
205     char        arch[10];
206     int         err;
207     PetscViewer viewer;
208 
209     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
210     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
211     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
212     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
213     if (filename) {
214       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
215     } else {
216       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
217       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
218       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
219     }
220 
221     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
222     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
223     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
224     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
225     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
226     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
227     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
228     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
229     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
230     err = fflush(*fd);
231     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
232   }
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "PetscCloseHistoryFile"
238 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
239 {
240   PetscErrorCode ierr;
241   PetscMPIInt    rank;
242   char           date[64];
243   int            err;
244 
245   PetscFunctionBegin;
246   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
247   if (!rank) {
248     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
249     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
250     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
251     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
252     err = fflush(*fd);
253     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
254     err = fclose(*fd);
255     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 /* ------------------------------------------------------------------------------*/
261 
262 /*
263    This is ugly and probably belongs somewhere else, but I want to
264   be able to put a true MPI abort error handler with command line args.
265 
266     This is so MPI errors in the debugger will leave all the stack
267   frames. The default MP_Abort() cleans up and exits thus providing no useful information
268   in the debugger hence we call abort() instead of MPI_Abort().
269 */
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "Petsc_MPI_AbortOnError"
273 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
274 {
275   PetscFunctionBegin;
276   (*PetscErrorPrintf)("MPI error %d\n",*flag);
277   abort();
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
282 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
283 {
284   PetscErrorCode ierr;
285 
286   PetscFunctionBegin;
287   (*PetscErrorPrintf)("MPI error %d\n",*flag);
288   ierr = PetscAttachDebugger();
289   if (ierr) { /* hopeless so get out */
290     MPI_Abort(*comm,*flag);
291   }
292 }
293 
294 #undef __FUNCT__
295 #define __FUNCT__ "PetscEnd"
296 /*@C
297    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
298      wishes a clean exit somewhere deep in the program.
299 
300    Collective on PETSC_COMM_WORLD
301 
302    Options Database Keys are the same as for PetscFinalize()
303 
304    Level: advanced
305 
306    Note:
307    See PetscInitialize() for more general runtime options.
308 
309 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
310 @*/
311 PetscErrorCode  PetscEnd(void)
312 {
313   PetscFunctionBegin;
314   PetscFinalize();
315   exit(0);
316   return 0;
317 }
318 
319 PetscBool    PetscOptionsPublish = PETSC_FALSE;
320 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
321 extern PetscBool  petscsetmallocvisited;
322 static char       emacsmachinename[256];
323 
324 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
325 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "PetscSetHelpVersionFunctions"
329 /*@C
330    PetscSetHelpVersionFunctions - Sets functions that print help and version information
331    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
332    This routine enables a "higher-level" package that uses PETSc to print its messages first.
333 
334    Input Parameter:
335 +  help - the help function (may be PETSC_NULL)
336 -  version - the version function (may be PETSC_NULL)
337 
338    Level: developer
339 
340    Concepts: package help message
341 
342 @*/
343 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
344 {
345   PetscFunctionBegin;
346   PetscExternalHelpFunction    = help;
347   PetscExternalVersionFunction = version;
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNCT__
352 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
353 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
354 {
355   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
356   MPI_Comm       comm = PETSC_COMM_WORLD;
357   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
358   PetscErrorCode ierr;
359   PetscReal      si;
360   int            i;
361   PetscMPIInt    rank;
362   char           version[256];
363 
364   PetscFunctionBegin;
365   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
366 
367   /*
368       Setup the memory management; support for tracing malloc() usage
369   */
370   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
371 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
372   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
373   if ((!flg2 || flg1) && !petscsetmallocvisited) {
374 #if defined(PETSC_HAVE_VALGRIND)
375     if (flg2 || !(RUNNING_ON_VALGRIND)) {
376       /* turn off default -malloc if valgrind is being used */
377 #endif
378       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
379 #if defined(PETSC_HAVE_VALGRIND)
380     }
381 #endif
382   }
383 #else
384   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
385   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
386   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
387 #endif
388   if (flg3) {
389     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
390   }
391   flg1 = PETSC_FALSE;
392   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
393   if (flg1) {
394     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
395     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
396   }
397 
398   flg1 = PETSC_FALSE;
399   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
400   if (!flg1) {
401     flg1 = PETSC_FALSE;
402     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
403   }
404   if (flg1) {
405     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
406   }
407 
408   /*
409       Set the display variable for graphics
410   */
411   ierr = PetscSetDisplay();CHKERRQ(ierr);
412 
413 #if defined(PETSC_USE_PTHREAD_CLASSES)
414   /*
415       Determine whether user specified maximum number of threads
416    */
417   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
418 
419   /*
420       Determine whether to use thread pool
421    */
422   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
423   if (flg1) {
424     PetscUseThreadPool = PETSC_TRUE;
425     PetscInt N_CORES = get_nprocs();
426     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
427     char tstr[9];
428     char tbuf[2];
429     strcpy(tstr,"-thread");
430     for(i=0;i<PetscMaxThreads;i++) {
431       ThreadCoreAffinity[i] = i;
432       sprintf(tbuf,"%d",i);
433       strcat(tstr,tbuf);
434       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
435       if(flg1) {
436         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
437         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
438       }
439       tstr[7] = '\0';
440     }
441     /* get the thread pool type */
442     PetscInt ipool = 0;
443     ierr = PetscOptionsGetInt(PETSC_NULL,"-pool",&ipool,PETSC_NULL);CHKERRQ(ierr);
444     switch(ipool) {
445     case 1:
446       PetscThreadFunc       = &PetscThreadFunc_Tree;
447       PetscThreadInitialize = &PetscThreadInitialize_Tree;
448       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
449       MainWait              = &MainWait_Tree;
450       MainJob               = &MainJob_Tree;
451       break;
452     case 2:
453       PetscThreadFunc       = &PetscThreadFunc_Main;
454       PetscThreadInitialize = &PetscThreadInitialize_Main;
455       PetscThreadFinalize   = &PetscThreadFinalize_Main;
456       MainWait              = &MainWait_Main;
457       MainJob               = &MainJob_Main;
458       break;
459 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
460     case 3:
461 #else
462     default:
463 #endif
464       PetscThreadFunc       = &PetscThreadFunc_Chain;
465       PetscThreadInitialize = &PetscThreadInitialize_Chain;
466       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
467       MainWait              = &MainWait_Chain;
468       MainJob               = &MainJob_Chain;
469       break;
470 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
471     default:
472       PetscThreadFunc       = &PetscThreadFunc_True;
473       PetscThreadInitialize = &PetscThreadInitialize_True;
474       PetscThreadFinalize   = &PetscThreadFinalize_True;
475       MainWait              = &MainWait_True;
476       MainJob               = &MainJob_True;
477       break;
478 #endif
479     }
480     PetscThreadInitialize(PetscMaxThreads);
481   }
482 #endif
483 
484   /*
485       Print the PETSc version information
486   */
487   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
488   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
489   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
490   if (flg1 || flg2 || flg3){
491 
492     /*
493        Print "higher-level" package version message
494     */
495     if (PetscExternalVersionFunction) {
496       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
497     }
498 
499     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
500     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
501 ------------------------------\n");CHKERRQ(ierr);
502     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
503     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
504     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
505     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
506     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
507     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
508     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
509 ------------------------------\n");CHKERRQ(ierr);
510   }
511 
512   /*
513        Print "higher-level" package help message
514   */
515   if (flg3){
516     if (PetscExternalHelpFunction) {
517       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
518     }
519   }
520 
521   /*
522       Setup the error handling
523   */
524   flg1 = PETSC_FALSE;
525   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
526   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
527   flg1 = PETSC_FALSE;
528   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
529   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
530   flg1 = PETSC_FALSE;
531   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
532   if (flg1) {
533     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
534   }
535   flg1 = PETSC_FALSE;
536   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
537   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
538   flg1 = PETSC_FALSE;
539   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
540   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
541 
542   /*
543       Setup debugger information
544   */
545   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
546   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
547   if (flg1) {
548     MPI_Errhandler err_handler;
549 
550     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
551     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
552     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
553     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
554   }
555   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
556   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
557   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
558   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
559   if (flg1 || flg2) {
560     PetscMPIInt    size;
561     PetscInt       lsize,*nodes;
562     MPI_Errhandler err_handler;
563     /*
564        we have to make sure that all processors have opened
565        connections to all other processors, otherwise once the
566        debugger has stated it is likely to receive a SIGUSR1
567        and kill the program.
568     */
569     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
570     if (size > 2) {
571       PetscMPIInt dummy = 0;
572       MPI_Status  status;
573       for (i=0; i<size; i++) {
574         if (rank != i) {
575           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
576         }
577       }
578       for (i=0; i<size; i++) {
579         if (rank != i) {
580           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
581         }
582       }
583     }
584     /* check if this processor node should be in debugger */
585     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
586     lsize = size;
587     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
588     if (flag) {
589       for (i=0; i<lsize; i++) {
590         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
591       }
592     }
593     if (!flag) {
594       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
595       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
596       if (flg1) {
597         ierr = PetscAttachDebugger();CHKERRQ(ierr);
598       } else {
599         ierr = PetscStopForDebugger();CHKERRQ(ierr);
600       }
601       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
602       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
603     }
604     ierr = PetscFree(nodes);CHKERRQ(ierr);
605   }
606 
607   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
608   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
609 
610 #if defined(PETSC_USE_SOCKET_VIEWER)
611   /*
612     Activates new sockets for zope if needed
613   */
614   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
615   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
616   if (flgz){
617     int  sockfd;
618     char hostname[256];
619     char username[256];
620     int  remoteport = 9999;
621 
622     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
623     if (!hostname[0]){
624       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
625     }
626     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
627     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
628     PETSC_ZOPEFD = fdopen(sockfd, "w");
629     if (flgzout){
630       PETSC_STDOUT = PETSC_ZOPEFD;
631       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
632       fprintf(PETSC_STDOUT, "<<<start>>>");
633     } else {
634       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
635       fprintf(PETSC_ZOPEFD, "<<<start>>>");
636     }
637   }
638 #endif
639 #if defined(PETSC_USE_SERVER)
640   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
641   if (flgz){
642     PetscInt port = PETSC_DECIDE;
643     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
644     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
645   }
646 #endif
647 
648   /*
649         Setup profiling and logging
650   */
651 #if defined (PETSC_USE_INFO)
652   {
653     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
654     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
655     if (flg1 && logname[0]) {
656       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
657     } else if (flg1) {
658       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
659     }
660   }
661 #endif
662 #if defined(PETSC_USE_LOG)
663   mname[0] = 0;
664   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
665   if (flg1) {
666     if (mname[0]) {
667       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
668     } else {
669       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
670     }
671   }
672 #if defined(PETSC_HAVE_MPE)
673   flg1 = PETSC_FALSE;
674   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
675   if (flg1) PetscLogMPEBegin();
676 #endif
677   flg1 = PETSC_FALSE;
678   flg2 = PETSC_FALSE;
679   flg3 = PETSC_FALSE;
680   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
681   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
682   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
683   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
684   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
685   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
686 
687   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
688   if (flg1) {
689     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
690     FILE *file;
691     if (mname[0]) {
692       sprintf(name,"%s.%d",mname,rank);
693       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
694       file = fopen(fname,"w");
695       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
696     } else {
697       file = PETSC_STDOUT;
698     }
699     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
700   }
701 #endif
702 
703   /*
704       Setup building of stack frames for all function calls
705   */
706 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
707   ierr = PetscStackCreate();CHKERRQ(ierr);
708 #endif
709 
710   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
711 
712   /*
713        Print basic help message
714   */
715   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
716   if (flg1) {
717     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
718     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
719     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
720     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
721     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
722     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
723     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
724     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
725     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
726     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
727     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
728     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
729     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
730     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
731     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
732     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
733     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
734     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
735     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
736     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
737     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
738     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
739     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
740     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
741     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
742     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
743     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
750 #if defined(PETSC_USE_LOG)
751     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
754 #if defined(PETSC_HAVE_MPE)
755     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
756 #endif
757     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
758 #endif
759     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
763   }
764 
765   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
766   if (flg1) {
767     ierr = PetscSleep(si);CHKERRQ(ierr);
768   }
769 
770   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
771   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
772   if (f) {
773     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
774   }
775 
776 #if defined(PETSC_HAVE_CUSP)
777   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
778   if (flg3) flg1 = PETSC_TRUE;
779   else flg1 = PETSC_FALSE;
780   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
781   if (flg1) synchronizeCUSP = PETSC_TRUE;
782 #endif
783 
784   PetscFunctionReturn(0);
785 }
786 
787 #if defined(PETSC_USE_PTHREAD_CLASSES)
788 
789 /**** 'Tree' Thread Pool Functions ****/
790 void* PetscThreadFunc_Tree(void* arg) {
791   PetscErrorCode iterr;
792   int icorr,ierr;
793   int* pId = (int*)arg;
794   int ThreadId = *pId,Mary = 2,i,SubWorker;
795   PetscBool PeeOn;
796   cpu_set_t mset;
797 
798   icorr = ThreadCoreAffinity[ThreadId];
799   CPU_ZERO(&mset);
800   CPU_SET(icorr,&mset);
801   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
802 
803   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
804     PeeOn = PETSC_TRUE;
805   }
806   else {
807     PeeOn = PETSC_FALSE;
808   }
809   if(PeeOn==PETSC_FALSE) {
810     /* check your subordinates, wait for them to be ready */
811     for(i=1;i<=Mary;i++) {
812       SubWorker = Mary*ThreadId+i;
813       if(SubWorker<PetscMaxThreads) {
814         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
815         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
816           /* upon entry, automically releases the lock and blocks
817            upon return, has the lock */
818           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
819         }
820         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
821       }
822     }
823     /* your subordinates are now ready */
824   }
825   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
826   /* update your ready status */
827   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
828   if(ThreadId==0) {
829     job_tree.eJobStat = JobCompleted;
830     /* ignal main */
831     ierr = pthread_cond_signal(&main_cond);
832   }
833   else {
834     /* tell your boss that you're ready to work */
835     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
836   }
837   /* the while loop needs to have an exit
838   the 'main' thread can terminate all the threads by performing a broadcast
839    and calling FuncFinish */
840   while(PetscThreadGo) {
841     /*need to check the condition to ensure we don't have to wait
842       waiting when you don't have to causes problems
843      also need to check the condition to ensure proper handling of spurious wakeups */
844     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
845       /* upon entry, automically releases the lock and blocks
846        upon return, has the lock */
847         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
848 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
849 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
850     }
851     if(ThreadId==0) {
852       job_tree.startJob = PETSC_FALSE;
853       job_tree.eJobStat = ThreadsWorking;
854     }
855     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
856     if(PeeOn==PETSC_FALSE) {
857       /* tell your subordinates it's time to get to work */
858       for(i=1; i<=Mary; i++) {
859 	SubWorker = Mary*ThreadId+i;
860         if(SubWorker<PetscMaxThreads) {
861           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
862         }
863       }
864     }
865     /* do your job */
866     if(job_tree.pdata==NULL) {
867       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
868     }
869     else {
870       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
871     }
872     if(iterr!=0) {
873       ithreaderr = 1;
874     }
875     if(PetscThreadGo) {
876       /* reset job, get ready for more */
877       if(PeeOn==PETSC_FALSE) {
878         /* check your subordinates, waiting for them to be ready
879          how do you know for a fact that a given subordinate has actually started? */
880 	for(i=1;i<=Mary;i++) {
881 	  SubWorker = Mary*ThreadId+i;
882           if(SubWorker<PetscMaxThreads) {
883             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
884             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
885               /* upon entry, automically releases the lock and blocks
886                upon return, has the lock */
887               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
888             }
889             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
890           }
891 	}
892         /* your subordinates are now ready */
893       }
894       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
895       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
896       if(ThreadId==0) {
897 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
898         /* root thread signals 'main' */
899         ierr = pthread_cond_signal(&main_cond);
900       }
901       else {
902         /* signal your boss before you go to sleep */
903         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
904       }
905     }
906   }
907   return NULL;
908 }
909 
910 #undef __FUNCT__
911 #define __FUNCT__ "PetscThreadInitialize_Tree"
912 void* PetscThreadInitialize_Tree(PetscInt N) {
913   PetscInt i,ierr;
914   int status;
915 
916   if(PetscUseThreadPool) {
917     size_t Val1 = (size_t)CACHE_LINE_SIZE;
918     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
919     arrmutex = (char*)memalign(Val1,Val2);
920     arrcond1 = (char*)memalign(Val1,Val2);
921     arrcond2 = (char*)memalign(Val1,Val2);
922     arrstart = (char*)memalign(Val1,Val2);
923     arrready = (char*)memalign(Val1,Val2);
924     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
925     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
926     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
927     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
928     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
929     /* initialize job structure */
930     for(i=0; i<PetscMaxThreads; i++) {
931       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
932       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
933       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
934       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
935       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
936     }
937     for(i=0; i<PetscMaxThreads; i++) {
938       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
939       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
940       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
941       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
942       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
943     }
944     job_tree.pfunc = NULL;
945     job_tree.pdata = (void**)malloc(N*sizeof(void*));
946     job_tree.startJob = PETSC_FALSE;
947     job_tree.eJobStat = JobInitiated;
948     pVal = (int*)malloc(N*sizeof(int));
949     /* allocate memory in the heap for the thread structure */
950     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
951     /* create threads */
952     for(i=0; i<N; i++) {
953       pVal[i] = i;
954       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
955       /* should check status */
956     }
957   }
958   return NULL;
959 }
960 
961 #undef __FUNCT__
962 #define __FUNCT__ "PetscThreadFinalize_Tree"
963 PetscErrorCode PetscThreadFinalize_Tree() {
964   int i,ierr;
965   void* jstatus;
966 
967   PetscFunctionBegin;
968 
969   if(PetscUseThreadPool) {
970     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
971     /* join the threads */
972     for(i=0; i<PetscMaxThreads; i++) {
973       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
974       /* do error checking*/
975     }
976     free(PetscThreadPoint);
977     free(arrmutex);
978     free(arrcond1);
979     free(arrcond2);
980     free(arrstart);
981     free(arrready);
982     free(job_tree.pdata);
983     free(pVal);
984   }
985   else {
986   }
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MainWait_Tree"
992 void MainWait_Tree() {
993   int ierr;
994   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
995   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
996     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
997   }
998   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
999 }
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MainJob_Tree"
1003 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1004   int i,ierr;
1005   PetscErrorCode ijoberr = 0;
1006   if(PetscUseThreadPool) {
1007     MainWait();
1008     job_tree.pfunc = pFunc;
1009     job_tree.pdata = data;
1010     job_tree.startJob = PETSC_TRUE;
1011     for(i=0; i<PetscMaxThreads; i++) {
1012       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1013     }
1014     job_tree.eJobStat = JobInitiated;
1015     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1016     if(pFunc!=FuncFinish) {
1017       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1018     }
1019   }
1020   else {
1021     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1022     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1023     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1024     free(apThread);
1025   }
1026   if(ithreaderr) {
1027     ijoberr = ithreaderr;
1028   }
1029   return ijoberr;
1030 }
1031 /****  ****/
1032 
1033 /**** 'Main' Thread Pool Functions ****/
1034 void* PetscThreadFunc_Main(void* arg) {
1035   PetscErrorCode iterr;
1036   int icorr,ierr;
1037   int* pId = (int*)arg;
1038   int ThreadId = *pId;
1039   cpu_set_t mset;
1040 
1041   icorr = ThreadCoreAffinity[ThreadId];
1042   CPU_ZERO(&mset);
1043   CPU_SET(icorr,&mset);
1044   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1045 
1046   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1047   /* update your ready status */
1048   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1049   /* tell the BOSS that you're ready to work before you go to sleep */
1050   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1051 
1052   /* the while loop needs to have an exit
1053      the 'main' thread can terminate all the threads by performing a broadcast
1054      and calling FuncFinish */
1055   while(PetscThreadGo) {
1056     /* need to check the condition to ensure we don't have to wait
1057        waiting when you don't have to causes problems
1058      also need to check the condition to ensure proper handling of spurious wakeups */
1059     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1060       /* upon entry, atomically releases the lock and blocks
1061        upon return, has the lock */
1062         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1063 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1064     }
1065     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1066     if(job_main.pdata==NULL) {
1067       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1068     }
1069     else {
1070       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1071     }
1072     if(iterr!=0) {
1073       ithreaderr = 1;
1074     }
1075     if(PetscThreadGo) {
1076       /* reset job, get ready for more */
1077       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1078       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1079       /* tell the BOSS that you're ready to work before you go to sleep */
1080       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1081     }
1082   }
1083   return NULL;
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "PetscThreadInitialize_Main"
1088 void* PetscThreadInitialize_Main(PetscInt N) {
1089   PetscInt i,ierr;
1090   int status;
1091 
1092   if(PetscUseThreadPool) {
1093     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1094     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1095     arrmutex = (char*)memalign(Val1,Val2);
1096     arrcond1 = (char*)memalign(Val1,Val2);
1097     arrcond2 = (char*)memalign(Val1,Val2);
1098     arrstart = (char*)memalign(Val1,Val2);
1099     arrready = (char*)memalign(Val1,Val2);
1100     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1101     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1102     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1103     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1104     /* initialize job structure */
1105     for(i=0; i<PetscMaxThreads; i++) {
1106       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1107       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1108       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1109       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1110     }
1111     for(i=0; i<PetscMaxThreads; i++) {
1112       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1113       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1114       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1115       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1116     }
1117     job_main.pfunc = NULL;
1118     job_main.pdata = (void**)malloc(N*sizeof(void*));
1119     pVal = (int*)malloc(N*sizeof(int));
1120     /* allocate memory in the heap for the thread structure */
1121     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1122     /* create threads */
1123     for(i=0; i<N; i++) {
1124       pVal[i] = i;
1125       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1126       /* error check */
1127     }
1128   }
1129   else {
1130   }
1131   return NULL;
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "PetscThreadFinalize_Main"
1136 PetscErrorCode PetscThreadFinalize_Main() {
1137   int i,ierr;
1138   void* jstatus;
1139 
1140   PetscFunctionBegin;
1141 
1142   if(PetscUseThreadPool) {
1143     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1144     /* join the threads */
1145     for(i=0; i<PetscMaxThreads; i++) {
1146       ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1147     }
1148     free(PetscThreadPoint);
1149     free(arrmutex);
1150     free(arrcond1);
1151     free(arrcond2);
1152     free(arrstart);
1153     free(arrready);
1154     free(job_main.pdata);
1155     free(pVal);
1156   }
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MainWait_Main"
1162 void MainWait_Main() {
1163   int i,ierr;
1164   for(i=0; i<PetscMaxThreads; i++) {
1165     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1166     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1167       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1168     }
1169     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1170   }
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MainJob_Main"
1175 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1176   int i,ierr;
1177   PetscErrorCode ijoberr = 0;
1178   if(PetscUseThreadPool) {
1179     MainWait(); /* you know everyone is waiting to be signalled! */
1180     job_main.pfunc = pFunc;
1181     job_main.pdata = data;
1182     for(i=0; i<PetscMaxThreads; i++) {
1183       *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1184     }
1185     /* tell the threads to go to work */
1186     for(i=0; i<PetscMaxThreads; i++) {
1187       ierr = pthread_cond_signal(job_main.cond2array[i]);
1188     }
1189     if(pFunc!=FuncFinish) {
1190       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1191     }
1192   }
1193   else {
1194     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1195     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1196     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1197     free(apThread);
1198   }
1199   if(ithreaderr) {
1200     ijoberr = ithreaderr;
1201   }
1202   return ijoberr;
1203 }
1204 /****  ****/
1205 
1206 /**** Chain Thread Functions ****/
1207 void* PetscThreadFunc_Chain(void* arg) {
1208   PetscErrorCode iterr;
1209   int icorr,ierr;
1210   int* pId = (int*)arg;
1211   int ThreadId = *pId;
1212   int SubWorker = ThreadId + 1;
1213   PetscBool PeeOn;
1214   cpu_set_t mset;
1215 
1216   icorr = ThreadCoreAffinity[ThreadId];
1217   CPU_ZERO(&mset);
1218   CPU_SET(icorr,&mset);
1219   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1220 
1221   if(ThreadId==(PetscMaxThreads-1)) {
1222     PeeOn = PETSC_TRUE;
1223   }
1224   else {
1225     PeeOn = PETSC_FALSE;
1226   }
1227   if(PeeOn==PETSC_FALSE) {
1228     /* check your subordinate, wait for him to be ready */
1229     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1230     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1231       /* upon entry, automically releases the lock and blocks
1232        upon return, has the lock */
1233       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1234     }
1235     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1236     /* your subordinate is now ready*/
1237   }
1238   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1239   /* update your ready status */
1240   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1241   if(ThreadId==0) {
1242     job_chain.eJobStat = JobCompleted;
1243     /* signal main */
1244     ierr = pthread_cond_signal(&main_cond);
1245   }
1246   else {
1247     /* tell your boss that you're ready to work */
1248     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1249   }
1250   /*  the while loop needs to have an exit
1251      the 'main' thread can terminate all the threads by performing a broadcast
1252    and calling FuncFinish */
1253   while(PetscThreadGo) {
1254     /* need to check the condition to ensure we don't have to wait
1255        waiting when you don't have to causes problems
1256      also need to check the condition to ensure proper handling of spurious wakeups */
1257     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1258       /*upon entry, automically releases the lock and blocks
1259        upon return, has the lock */
1260         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1261 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1262 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1263     }
1264     if(ThreadId==0) {
1265       job_chain.startJob = PETSC_FALSE;
1266       job_chain.eJobStat = ThreadsWorking;
1267     }
1268     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1269     if(PeeOn==PETSC_FALSE) {
1270       /* tell your subworker it's time to get to work */
1271       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1272     }
1273     /* do your job */
1274     if(job_chain.pdata==NULL) {
1275       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1276     }
1277     else {
1278       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1279     }
1280     if(iterr!=0) {
1281       ithreaderr = 1;
1282     }
1283     if(PetscThreadGo) {
1284       /* reset job, get ready for more */
1285       if(PeeOn==PETSC_FALSE) {
1286         /* check your subordinate, wait for him to be ready
1287          how do you know for a fact that your subordinate has actually started? */
1288         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1289         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1290           /* upon entry, automically releases the lock and blocks
1291            upon return, has the lock */
1292           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1293         }
1294         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1295         /* your subordinate is now ready */
1296       }
1297       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1298       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1299       if(ThreadId==0) {
1300 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1301         /* root thread (foreman) signals 'main' */
1302         ierr = pthread_cond_signal(&main_cond);
1303       }
1304       else {
1305         /* signal your boss before you go to sleep */
1306         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1307       }
1308     }
1309   }
1310   return NULL;
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "PetscThreadInitialize_Chain"
1315 void* PetscThreadInitialize_Chain(PetscInt N) {
1316   PetscInt i,ierr;
1317   int status;
1318 
1319   if(PetscUseThreadPool) {
1320     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1321     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1322     arrmutex = (char*)memalign(Val1,Val2);
1323     arrcond1 = (char*)memalign(Val1,Val2);
1324     arrcond2 = (char*)memalign(Val1,Val2);
1325     arrstart = (char*)memalign(Val1,Val2);
1326     arrready = (char*)memalign(Val1,Val2);
1327     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1328     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1329     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1330     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1331     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1332     /* initialize job structure */
1333     for(i=0; i<PetscMaxThreads; i++) {
1334       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1335       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1336       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1337       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1338       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1339     }
1340     for(i=0; i<PetscMaxThreads; i++) {
1341       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1342       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1343       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1344       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1345       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1346     }
1347     job_chain.pfunc = NULL;
1348     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1349     job_chain.startJob = PETSC_FALSE;
1350     job_chain.eJobStat = JobInitiated;
1351     pVal = (int*)malloc(N*sizeof(int));
1352     /* allocate memory in the heap for the thread structure */
1353     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1354     /* create threads */
1355     for(i=0; i<N; i++) {
1356       pVal[i] = i;
1357       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1358       /* should check error */
1359     }
1360   }
1361   else {
1362   }
1363   return NULL;
1364 }
1365 
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "PetscThreadFinalize_Chain"
1369 PetscErrorCode PetscThreadFinalize_Chain() {
1370   int i,ierr;
1371   void* jstatus;
1372 
1373   PetscFunctionBegin;
1374 
1375   if(PetscUseThreadPool) {
1376     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1377     /* join the threads */
1378     for(i=0; i<PetscMaxThreads; i++) {
1379       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1380       /* should check error */
1381     }
1382     free(PetscThreadPoint);
1383     free(arrmutex);
1384     free(arrcond1);
1385     free(arrcond2);
1386     free(arrstart);
1387     free(arrready);
1388     free(job_chain.pdata);
1389     free(pVal);
1390   }
1391   else {
1392   }
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MainWait_Chain"
1398 void MainWait_Chain() {
1399   int ierr;
1400   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1401   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1402     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1403   }
1404   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MainJob_Chain"
1409 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1410   int i,ierr;
1411   PetscErrorCode ijoberr = 0;
1412   if(PetscUseThreadPool) {
1413     MainWait();
1414     job_chain.pfunc = pFunc;
1415     job_chain.pdata = data;
1416     job_chain.startJob = PETSC_TRUE;
1417     for(i=0; i<PetscMaxThreads; i++) {
1418       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1419     }
1420     job_chain.eJobStat = JobInitiated;
1421     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1422     if(pFunc!=FuncFinish) {
1423       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1424     }
1425   }
1426   else {
1427     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1428     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1429     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1430     free(apThread);
1431   }
1432   if(ithreaderr) {
1433     ijoberr = ithreaderr;
1434   }
1435   return ijoberr;
1436 }
1437 /****  ****/
1438 
1439 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1440 /**** True Thread Functions ****/
1441 void* PetscThreadFunc_True(void* arg) {
1442   int icorr,ierr,iVal;
1443   int* pId = (int*)arg;
1444   int ThreadId = *pId;
1445   PetscErrorCode iterr;
1446   cpu_set_t mset;
1447 
1448   icorr = ThreadCoreAffinity[ThreadId];
1449   CPU_ZERO(&mset);
1450   CPU_SET(icorr,&mset);
1451   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1452 
1453   ierr = pthread_mutex_lock(&job_true.mutex);
1454   job_true.iNumReadyThreads++;
1455   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1456     ierr = pthread_cond_signal(&main_cond);
1457   }
1458   /*the while loop needs to have an exit
1459     the 'main' thread can terminate all the threads by performing a broadcast
1460    and calling FuncFinish */
1461   while(PetscThreadGo) {
1462     /*need to check the condition to ensure we don't have to wait
1463       waiting when you don't have to causes problems
1464      also need to wait if another thread sneaks in and messes with the predicate */
1465     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1466       /* upon entry, automically releases the lock and blocks
1467        upon return, has the lock */
1468       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1469     }
1470     job_true.startJob = PETSC_FALSE;
1471     job_true.iNumJobThreads--;
1472     job_true.iNumReadyThreads--;
1473     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1474     pthread_mutex_unlock(&job_true.mutex);
1475     if(job_true.pdata==NULL) {
1476       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1477     }
1478     else {
1479       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1480     }
1481     if(iterr!=0) {
1482       ithreaderr = 1;
1483     }
1484     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1485       what happens if a thread finishes before they all start? BAD!
1486      what happens if a thread finishes before any else start? BAD! */
1487     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1488     /* reset job */
1489     if(PetscThreadGo) {
1490       pthread_mutex_lock(&job_true.mutex);
1491       job_true.iNumReadyThreads++;
1492       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1493 	/* signal the 'main' thread that the job is done! (only done once) */
1494 	ierr = pthread_cond_signal(&main_cond);
1495       }
1496     }
1497   }
1498   return NULL;
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "PetscThreadInitialize_True"
1503 void* PetscThreadInitialize_True(PetscInt N) {
1504   PetscInt i;
1505   int status;
1506 
1507   if(PetscUseThreadPool) {
1508     pVal = (int*)malloc(N*sizeof(int));
1509     /* allocate memory in the heap for the thread structure */
1510     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1511     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1512     job_true.pdata = (void**)malloc(N*sizeof(void*));
1513     for(i=0; i<N; i++) {
1514       pVal[i] = i;
1515       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1516       /* error check to ensure proper thread creation */
1517       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1518       /* should check error */
1519     }
1520   }
1521   else {
1522   }
1523   return NULL;
1524 }
1525 
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "PetscThreadFinalize_True"
1529 PetscErrorCode PetscThreadFinalize_True() {
1530   int i,ierr;
1531   void* jstatus;
1532 
1533   PetscFunctionBegin;
1534 
1535   if(PetscUseThreadPool) {
1536     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1537     /* join the threads */
1538     for(i=0; i<PetscMaxThreads; i++) {
1539       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1540       /* should check error */
1541     }
1542     free(BarrPoint);
1543     free(PetscThreadPoint);
1544   }
1545   else {
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MainWait_True"
1552 void MainWait_True() {
1553   int ierr;
1554   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1555     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1556   }
1557   ierr = pthread_mutex_unlock(&job_true.mutex);
1558 }
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MainJob_True"
1562 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1563   int ierr;
1564   PetscErrorCode ijoberr = 0;
1565   if(PetscUseThreadPool) {
1566     MainWait();
1567     job_true.pfunc = pFunc;
1568     job_true.pdata = data;
1569     job_true.pbarr = &BarrPoint[n];
1570     job_true.iNumJobThreads = n;
1571     job_true.startJob = PETSC_TRUE;
1572     ierr = pthread_cond_broadcast(&job_true.cond);
1573     if(pFunc!=FuncFinish) {
1574       MainWait(); /* why wait after? guarantees that job gets done */
1575     }
1576   }
1577   else {
1578     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1579     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1580     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1581     free(apThread);
1582   }
1583   if(ithreaderr) {
1584     ijoberr = ithreaderr;
1585   }
1586   return ijoberr;
1587 }
1588 /****  ****/
1589 #endif
1590 
1591 void* FuncFinish(void* arg) {
1592   PetscThreadGo = PETSC_FALSE;
1593   return(0);
1594 }
1595 
1596 #endif
1597