xref: /petsc/src/sys/objects/init.c (revision b154f58a601ff385df9942fba2a6498e829a8e92)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #define _GNU_SOURCE
10 #include <sched.h>
11 #include <petscsys.h>        /*I  "petscsys.h"   I*/
12 #if defined(PETSC_USE_PTHREAD)
13 #include <pthread.h>
14 #endif
15 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
16 #include <sys/sysinfo.h>
17 #endif
18 #include <unistd.h>
19 #if defined(PETSC_HAVE_STDLIB_H)
20 #include <stdlib.h>
21 #endif
22 #if defined(PETSC_HAVE_MALLOC_H)
23 #include <malloc.h>
24 #endif
25 #if defined(PETSC_HAVE_VALGRIND)
26 #include <valgrind/valgrind.h>
27 #endif
28 
29 /* ------------------------Nasty global variables -------------------------------*/
30 /*
31      Indicates if PETSc started up MPI, or it was
32    already started before PETSc was initialized.
33 */
34 PetscBool    PetscBeganMPI         = PETSC_FALSE;
35 PetscBool    PetscInitializeCalled = PETSC_FALSE;
36 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
37 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
38 PetscBool    PetscThreadGo         = PETSC_TRUE;
39 PetscMPIInt  PetscGlobalRank = -1;
40 PetscMPIInt  PetscGlobalSize = -1;
41 
42 #if defined(PETSC_USE_PTHREAD_CLASSES)
43 PetscMPIInt  PetscMaxThreads = 2;
44 pthread_t*   PetscThreadPoint;
45 #define PETSC_HAVE_PTHREAD_BARRIER
46 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
47 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
48 #endif
49 PetscErrorCode ithreaderr = 0;
50 int*         pVal;
51 
52 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
53 int* ThreadCoreAffinity;
54 
55 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
56 
57 typedef struct {
58   pthread_mutex_t** mutexarray;
59   pthread_cond_t**  cond1array;
60   pthread_cond_t** cond2array;
61   void* (*pfunc)(void*);
62   void** pdata;
63   PetscBool startJob;
64   estat eJobStat;
65   PetscBool** arrThreadStarted;
66   PetscBool** arrThreadReady;
67 } sjob_tree;
68 sjob_tree job_tree;
69 typedef struct {
70   pthread_mutex_t** mutexarray;
71   pthread_cond_t**  cond1array;
72   pthread_cond_t** cond2array;
73   void* (*pfunc)(void*);
74   void** pdata;
75   PetscBool** arrThreadReady;
76 } sjob_main;
77 sjob_main job_main;
78 typedef struct {
79   pthread_mutex_t** mutexarray;
80   pthread_cond_t**  cond1array;
81   pthread_cond_t** cond2array;
82   void* (*pfunc)(void*);
83   void** pdata;
84   PetscBool startJob;
85   estat eJobStat;
86   PetscBool** arrThreadStarted;
87   PetscBool** arrThreadReady;
88 } sjob_chain;
89 sjob_chain job_chain;
90 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
91 typedef struct {
92   pthread_mutex_t mutex;
93   pthread_cond_t cond;
94   void* (*pfunc)(void*);
95   void** pdata;
96   pthread_barrier_t* pbarr;
97   int iNumJobThreads;
98   int iNumReadyThreads;
99   PetscBool startJob;
100 } sjob_true;
101 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
102 #endif
103 
104 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
105 char* arrmutex; /* used by 'chain','main','tree' thread pools */
106 char* arrcond1; /* used by 'chain','main','tree' thread pools */
107 char* arrcond2; /* used by 'chain','main','tree' thread pools */
108 char* arrstart; /* used by 'chain','main','tree' thread pools */
109 char* arrready; /* used by 'chain','main','tree' thread pools */
110 
111 /* Function Pointers */
112 void*          (*PetscThreadFunc)(void*) = NULL;
113 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
114 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
115 void           (*MainWait)(void) = NULL;
116 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
117 /**** Tree Functions ****/
118 void*          PetscThreadFunc_Tree(void*);
119 void*          PetscThreadInitialize_Tree(PetscInt);
120 PetscErrorCode PetscThreadFinalize_Tree(void);
121 void           MainWait_Tree(void);
122 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
123 /**** Main Functions ****/
124 void*          PetscThreadFunc_Main(void*);
125 void*          PetscThreadInitialize_Main(PetscInt);
126 PetscErrorCode PetscThreadFinalize_Main(void);
127 void           MainWait_Main(void);
128 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
129 /**** Chain Functions ****/
130 void*          PetscThreadFunc_Chain(void*);
131 void*          PetscThreadInitialize_Chain(PetscInt);
132 PetscErrorCode PetscThreadFinalize_Chain(void);
133 void           MainWait_Chain(void);
134 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
135 /**** True Functions ****/
136 void*          PetscThreadFunc_True(void*);
137 void*          PetscThreadInitialize_True(PetscInt);
138 PetscErrorCode PetscThreadFinalize_True(void);
139 void           MainWait_True(void);
140 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
141 /****  ****/
142 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
143 
144 void* FuncFinish(void*);
145 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
146 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
147 #endif
148 
149 #if defined(PETSC_USE_COMPLEX)
150 #if defined(PETSC_COMPLEX_INSTANTIATE)
151 template <> class std::complex<double>; /* instantiate complex template class */
152 #endif
153 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
154 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
155 MPI_Datatype   MPI_C_COMPLEX;
156 #endif
157 PetscScalar    PETSC_i;
158 #else
159 PetscScalar    PETSC_i = 0.0;
160 #endif
161 #if defined(PETSC_USE_REAL___FLOAT128)
162 MPI_Datatype   MPIU___FLOAT128 = 0;
163 #endif
164 MPI_Datatype   MPIU_2SCALAR = 0;
165 MPI_Datatype   MPIU_2INT = 0;
166 
167 /*
168      These are needed by petscbt.h
169 */
170 #include <petscbt.h>
171 char      _BT_mask = ' ';
172 char      _BT_c = ' ';
173 PetscInt  _BT_idx  = 0;
174 
175 /*
176        Function that is called to display all error messages
177 */
178 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
179 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
180 #if defined(PETSC_HAVE_MATLAB_ENGINE)
181 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
182 #else
183 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
184 #endif
185 /*
186   This is needed to turn on/off cusp synchronization */
187 PetscBool   synchronizeCUSP = PETSC_FALSE;
188 
189 /* ------------------------------------------------------------------------------*/
190 /*
191    Optional file where all PETSc output from various prints is saved
192 */
193 FILE *petsc_history = PETSC_NULL;
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "PetscOpenHistoryFile"
197 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
198 {
199   PetscErrorCode ierr;
200   PetscMPIInt    rank,size;
201   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
202   char           version[256];
203 
204   PetscFunctionBegin;
205   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
206   if (!rank) {
207     char        arch[10];
208     int         err;
209     PetscViewer viewer;
210 
211     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
212     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
213     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
214     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
215     if (filename) {
216       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
217     } else {
218       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
219       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
220       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
221     }
222 
223     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
224     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
225     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
226     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
227     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
228     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
229     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
230     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
231     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
232     err = fflush(*fd);
233     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "PetscCloseHistoryFile"
240 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
241 {
242   PetscErrorCode ierr;
243   PetscMPIInt    rank;
244   char           date[64];
245   int            err;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
249   if (!rank) {
250     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
251     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
252     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
253     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
254     err = fflush(*fd);
255     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
256     err = fclose(*fd);
257     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 /* ------------------------------------------------------------------------------*/
263 
264 /*
265    This is ugly and probably belongs somewhere else, but I want to
266   be able to put a true MPI abort error handler with command line args.
267 
268     This is so MPI errors in the debugger will leave all the stack
269   frames. The default MP_Abort() cleans up and exits thus providing no useful information
270   in the debugger hence we call abort() instead of MPI_Abort().
271 */
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "Petsc_MPI_AbortOnError"
275 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
276 {
277   PetscFunctionBegin;
278   (*PetscErrorPrintf)("MPI error %d\n",*flag);
279   abort();
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
284 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
285 {
286   PetscErrorCode ierr;
287 
288   PetscFunctionBegin;
289   (*PetscErrorPrintf)("MPI error %d\n",*flag);
290   ierr = PetscAttachDebugger();
291   if (ierr) { /* hopeless so get out */
292     MPI_Abort(*comm,*flag);
293   }
294 }
295 
296 #undef __FUNCT__
297 #define __FUNCT__ "PetscEnd"
298 /*@C
299    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
300      wishes a clean exit somewhere deep in the program.
301 
302    Collective on PETSC_COMM_WORLD
303 
304    Options Database Keys are the same as for PetscFinalize()
305 
306    Level: advanced
307 
308    Note:
309    See PetscInitialize() for more general runtime options.
310 
311 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
312 @*/
313 PetscErrorCode  PetscEnd(void)
314 {
315   PetscFunctionBegin;
316   PetscFinalize();
317   exit(0);
318   return 0;
319 }
320 
321 PetscBool    PetscOptionsPublish = PETSC_FALSE;
322 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
323 extern PetscBool  petscsetmallocvisited;
324 static char       emacsmachinename[256];
325 
326 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
327 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "PetscSetHelpVersionFunctions"
331 /*@C
332    PetscSetHelpVersionFunctions - Sets functions that print help and version information
333    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
334    This routine enables a "higher-level" package that uses PETSc to print its messages first.
335 
336    Input Parameter:
337 +  help - the help function (may be PETSC_NULL)
338 -  version - the version function (may be PETSC_NULL)
339 
340    Level: developer
341 
342    Concepts: package help message
343 
344 @*/
345 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
346 {
347   PetscFunctionBegin;
348   PetscExternalHelpFunction    = help;
349   PetscExternalVersionFunction = version;
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
355 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
356 {
357   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
358   MPI_Comm       comm = PETSC_COMM_WORLD;
359   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
360   PetscErrorCode ierr;
361   PetscReal      si;
362   int            i;
363   PetscMPIInt    rank;
364   char           version[256];
365 
366   PetscFunctionBegin;
367   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
368 
369   /*
370       Setup the memory management; support for tracing malloc() usage
371   */
372   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
373 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
374   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
375   if ((!flg2 || flg1) && !petscsetmallocvisited) {
376 #if defined(PETSC_HAVE_VALGRIND)
377     if (flg2 || !(RUNNING_ON_VALGRIND)) {
378       /* turn off default -malloc if valgrind is being used */
379 #endif
380       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
381 #if defined(PETSC_HAVE_VALGRIND)
382     }
383 #endif
384   }
385 #else
386   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
387   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
388   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
389 #endif
390   if (flg3) {
391     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
392   }
393   flg1 = PETSC_FALSE;
394   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
395   if (flg1) {
396     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
397     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
398   }
399 
400   flg1 = PETSC_FALSE;
401   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
402   if (!flg1) {
403     flg1 = PETSC_FALSE;
404     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
405   }
406   if (flg1) {
407     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
408   }
409 
410   /*
411       Set the display variable for graphics
412   */
413   ierr = PetscSetDisplay();CHKERRQ(ierr);
414 
415 
416   /*
417       Print the PETSc version information
418   */
419   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
420   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
421   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
422   if (flg1 || flg2 || flg3){
423 
424     /*
425        Print "higher-level" package version message
426     */
427     if (PetscExternalVersionFunction) {
428       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
429     }
430 
431     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
432     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
433 ------------------------------\n");CHKERRQ(ierr);
434     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
435     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
436     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
437     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
439     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
441 ------------------------------\n");CHKERRQ(ierr);
442   }
443 
444   /*
445        Print "higher-level" package help message
446   */
447   if (flg3){
448     if (PetscExternalHelpFunction) {
449       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
450     }
451   }
452 
453   /*
454       Setup the error handling
455   */
456   flg1 = PETSC_FALSE;
457   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
458   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
459   flg1 = PETSC_FALSE;
460   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
461   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
462   flg1 = PETSC_FALSE;
463   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
464   if (flg1) {
465     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
466   }
467   flg1 = PETSC_FALSE;
468   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
469   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
470   flg1 = PETSC_FALSE;
471   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
472   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
473 
474   /*
475       Setup debugger information
476   */
477   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
478   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
479   if (flg1) {
480     MPI_Errhandler err_handler;
481 
482     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
483     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
484     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
485     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
486   }
487   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
488   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
489   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
490   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
491   if (flg1 || flg2) {
492     PetscMPIInt    size;
493     PetscInt       lsize,*nodes;
494     MPI_Errhandler err_handler;
495     /*
496        we have to make sure that all processors have opened
497        connections to all other processors, otherwise once the
498        debugger has stated it is likely to receive a SIGUSR1
499        and kill the program.
500     */
501     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
502     if (size > 2) {
503       PetscMPIInt dummy = 0;
504       MPI_Status  status;
505       for (i=0; i<size; i++) {
506         if (rank != i) {
507           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
508         }
509       }
510       for (i=0; i<size; i++) {
511         if (rank != i) {
512           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
513         }
514       }
515     }
516     /* check if this processor node should be in debugger */
517     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
518     lsize = size;
519     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
520     if (flag) {
521       for (i=0; i<lsize; i++) {
522         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
523       }
524     }
525     if (!flag) {
526       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
527       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
528       if (flg1) {
529         ierr = PetscAttachDebugger();CHKERRQ(ierr);
530       } else {
531         ierr = PetscStopForDebugger();CHKERRQ(ierr);
532       }
533       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
534       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
535     }
536     ierr = PetscFree(nodes);CHKERRQ(ierr);
537   }
538 
539   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
540   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
541 
542 #if defined(PETSC_USE_SOCKET_VIEWER)
543   /*
544     Activates new sockets for zope if needed
545   */
546   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
547   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
548   if (flgz){
549     int  sockfd;
550     char hostname[256];
551     char username[256];
552     int  remoteport = 9999;
553 
554     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
555     if (!hostname[0]){
556       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
557     }
558     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
559     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
560     PETSC_ZOPEFD = fdopen(sockfd, "w");
561     if (flgzout){
562       PETSC_STDOUT = PETSC_ZOPEFD;
563       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
564       fprintf(PETSC_STDOUT, "<<<start>>>");
565     } else {
566       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
567       fprintf(PETSC_ZOPEFD, "<<<start>>>");
568     }
569   }
570 #endif
571 #if defined(PETSC_USE_SERVER)
572   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
573   if (flgz){
574     PetscInt port = PETSC_DECIDE;
575     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
576     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
577   }
578 #endif
579 
580   /*
581         Setup profiling and logging
582   */
583 #if defined (PETSC_USE_INFO)
584   {
585     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
586     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
587     if (flg1 && logname[0]) {
588       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
589     } else if (flg1) {
590       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
591     }
592   }
593 #endif
594 #if defined(PETSC_USE_LOG)
595   mname[0] = 0;
596   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
597   if (flg1) {
598     if (mname[0]) {
599       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
600     } else {
601       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
602     }
603   }
604 #if defined(PETSC_HAVE_MPE)
605   flg1 = PETSC_FALSE;
606   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
607   if (flg1) PetscLogMPEBegin();
608 #endif
609   flg1 = PETSC_FALSE;
610   flg2 = PETSC_FALSE;
611   flg3 = PETSC_FALSE;
612   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
613   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
614   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
615   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
616   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
617   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
618 
619   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
620   if (flg1) {
621     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
622     FILE *file;
623     if (mname[0]) {
624       sprintf(name,"%s.%d",mname,rank);
625       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
626       file = fopen(fname,"w");
627       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
628     } else {
629       file = PETSC_STDOUT;
630     }
631     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
632   }
633 #endif
634 
635   /*
636       Setup building of stack frames for all function calls
637   */
638 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
639   ierr = PetscStackCreate();CHKERRQ(ierr);
640 #endif
641 
642   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
643 
644 #if defined(PETSC_USE_PTHREAD_CLASSES)
645   /*
646       Determine whether user specified maximum number of threads
647    */
648   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
649 
650   ierr = PetscOptionsHasName(PETSC_NULL,"-main",&flg1);CHKERRQ(ierr);
651   if(flg1) {
652     cpu_set_t mset;
653     int icorr,ncorr = get_nprocs();
654     ierr = PetscOptionsGetInt(PETSC_NULL,"-main",&icorr,PETSC_NULL);CHKERRQ(ierr);
655     CPU_ZERO(&mset);
656     CPU_SET(icorr%ncorr,&mset);
657     sched_setaffinity(0,sizeof(cpu_set_t),&mset);
658   }
659 
660   /*
661       Determine whether to use thread pool
662    */
663   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
664   if (flg1) {
665     PetscUseThreadPool = PETSC_TRUE;
666     PetscInt N_CORES = get_nprocs();
667     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
668     char tstr[9];
669     char tbuf[2];
670     strcpy(tstr,"-thread");
671     for(i=0;i<PetscMaxThreads;i++) {
672       ThreadCoreAffinity[i] = i;
673       sprintf(tbuf,"%d",i);
674       strcat(tstr,tbuf);
675       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
676       if(flg1) {
677         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
678         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
679       }
680       tstr[7] = '\0';
681     }
682     /* get the thread pool type */
683     PetscInt ipool = 0;
684     const char *choices[4] = {"true","tree","main","chain"};
685 
686     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
687     switch(ipool) {
688     case 1:
689       PetscThreadFunc       = &PetscThreadFunc_Tree;
690       PetscThreadInitialize = &PetscThreadInitialize_Tree;
691       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
692       MainWait              = &MainWait_Tree;
693       MainJob               = &MainJob_Tree;
694       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
695       break;
696     case 2:
697       PetscThreadFunc       = &PetscThreadFunc_Main;
698       PetscThreadInitialize = &PetscThreadInitialize_Main;
699       PetscThreadFinalize   = &PetscThreadFinalize_Main;
700       MainWait              = &MainWait_Main;
701       MainJob               = &MainJob_Main;
702       PetscInfo(PETSC_NULL,"Using main thread pool\n");
703       break;
704 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
705     case 3:
706 #else
707     default:
708 #endif
709       PetscThreadFunc       = &PetscThreadFunc_Chain;
710       PetscThreadInitialize = &PetscThreadInitialize_Chain;
711       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
712       MainWait              = &MainWait_Chain;
713       MainJob               = &MainJob_Chain;
714       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
715       break;
716 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
717     default:
718       PetscThreadFunc       = &PetscThreadFunc_True;
719       PetscThreadInitialize = &PetscThreadInitialize_True;
720       PetscThreadFinalize   = &PetscThreadFinalize_True;
721       MainWait              = &MainWait_True;
722       MainJob               = &MainJob_True;
723       PetscInfo(PETSC_NULL,"Using true thread pool\n");
724       break;
725 #endif
726     }
727     PetscThreadInitialize(PetscMaxThreads);
728   } else {
729     //need to define these in the case on 'no threads' or 'thread create/destroy'
730     //could take any of the above versions
731     MainJob               = &MainJob_Spawn;
732   }
733 #endif
734   /*
735        Print basic help message
736   */
737   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
738   if (flg1) {
739     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
740     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
741     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
742     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
743     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
763     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
764     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
768     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
769     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
770     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
771     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
773 #if defined(PETSC_USE_LOG)
774     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
776     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
777 #if defined(PETSC_HAVE_MPE)
778     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
779 #endif
780     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
781 #endif
782     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
783     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
784     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
785     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
786   }
787 
788   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
789   if (flg1) {
790     ierr = PetscSleep(si);CHKERRQ(ierr);
791   }
792 
793   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
794   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
795   if (f) {
796     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
797   }
798 
799 #if defined(PETSC_HAVE_CUSP)
800   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
801   if (flg3) flg1 = PETSC_TRUE;
802   else flg1 = PETSC_FALSE;
803   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
804   if (flg1) synchronizeCUSP = PETSC_TRUE;
805 #endif
806 
807   PetscFunctionReturn(0);
808 }
809 
810 #if defined(PETSC_USE_PTHREAD_CLASSES)
811 
812 /**** 'Tree' Thread Pool Functions ****/
813 void* PetscThreadFunc_Tree(void* arg) {
814   PetscErrorCode iterr;
815   int icorr,ierr;
816   int* pId = (int*)arg;
817   int ThreadId = *pId,Mary = 2,i,SubWorker;
818   PetscBool PeeOn;
819   cpu_set_t mset;
820   //printf("Thread %d In Tree Thread Function\n",ThreadId);
821   icorr = ThreadCoreAffinity[ThreadId];
822   CPU_ZERO(&mset);
823   CPU_SET(icorr,&mset);
824   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
825 
826   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
827     PeeOn = PETSC_TRUE;
828   }
829   else {
830     PeeOn = PETSC_FALSE;
831   }
832   if(PeeOn==PETSC_FALSE) {
833     /* check your subordinates, wait for them to be ready */
834     for(i=1;i<=Mary;i++) {
835       SubWorker = Mary*ThreadId+i;
836       if(SubWorker<PetscMaxThreads) {
837         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
838         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
839           /* upon entry, automically releases the lock and blocks
840            upon return, has the lock */
841           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
842         }
843         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
844       }
845     }
846     /* your subordinates are now ready */
847   }
848   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
849   /* update your ready status */
850   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
851   if(ThreadId==0) {
852     job_tree.eJobStat = JobCompleted;
853     /* ignal main */
854     ierr = pthread_cond_signal(&main_cond);
855   }
856   else {
857     /* tell your boss that you're ready to work */
858     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
859   }
860   /* the while loop needs to have an exit
861   the 'main' thread can terminate all the threads by performing a broadcast
862    and calling FuncFinish */
863   while(PetscThreadGo) {
864     /*need to check the condition to ensure we don't have to wait
865       waiting when you don't have to causes problems
866      also need to check the condition to ensure proper handling of spurious wakeups */
867     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
868       /* upon entry, automically releases the lock and blocks
869        upon return, has the lock */
870         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
871 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
872 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
873     }
874     if(ThreadId==0) {
875       job_tree.startJob = PETSC_FALSE;
876       job_tree.eJobStat = ThreadsWorking;
877     }
878     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
879     if(PeeOn==PETSC_FALSE) {
880       /* tell your subordinates it's time to get to work */
881       for(i=1; i<=Mary; i++) {
882 	SubWorker = Mary*ThreadId+i;
883         if(SubWorker<PetscMaxThreads) {
884           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
885         }
886       }
887     }
888     /* do your job */
889     if(job_tree.pdata==NULL) {
890       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
891     }
892     else {
893       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
894     }
895     if(iterr!=0) {
896       ithreaderr = 1;
897     }
898     if(PetscThreadGo) {
899       /* reset job, get ready for more */
900       if(PeeOn==PETSC_FALSE) {
901         /* check your subordinates, waiting for them to be ready
902          how do you know for a fact that a given subordinate has actually started? */
903 	for(i=1;i<=Mary;i++) {
904 	  SubWorker = Mary*ThreadId+i;
905           if(SubWorker<PetscMaxThreads) {
906             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
907             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
908               /* upon entry, automically releases the lock and blocks
909                upon return, has the lock */
910               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
911             }
912             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
913           }
914 	}
915         /* your subordinates are now ready */
916       }
917       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
918       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
919       if(ThreadId==0) {
920 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
921         /* root thread signals 'main' */
922         ierr = pthread_cond_signal(&main_cond);
923       }
924       else {
925         /* signal your boss before you go to sleep */
926         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
927       }
928     }
929   }
930   return NULL;
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "PetscThreadInitialize_Tree"
935 void* PetscThreadInitialize_Tree(PetscInt N) {
936   PetscInt i,ierr;
937   int status;
938 
939   if(PetscUseThreadPool) {
940     size_t Val1 = (size_t)CACHE_LINE_SIZE;
941     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
942     arrmutex = (char*)memalign(Val1,Val2);
943     arrcond1 = (char*)memalign(Val1,Val2);
944     arrcond2 = (char*)memalign(Val1,Val2);
945     arrstart = (char*)memalign(Val1,Val2);
946     arrready = (char*)memalign(Val1,Val2);
947     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
948     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
949     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
950     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
951     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
952     /* initialize job structure */
953     for(i=0; i<PetscMaxThreads; i++) {
954       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
955       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
956       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
957       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
958       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
959     }
960     for(i=0; i<PetscMaxThreads; i++) {
961       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
962       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
963       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
964       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
965       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
966     }
967     job_tree.pfunc = NULL;
968     job_tree.pdata = (void**)malloc(N*sizeof(void*));
969     job_tree.startJob = PETSC_FALSE;
970     job_tree.eJobStat = JobInitiated;
971     pVal = (int*)malloc(N*sizeof(int));
972     /* allocate memory in the heap for the thread structure */
973     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
974     /* create threads */
975     for(i=0; i<N; i++) {
976       pVal[i] = i;
977       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
978       /* should check status */
979     }
980   }
981   return NULL;
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "PetscThreadFinalize_Tree"
986 PetscErrorCode PetscThreadFinalize_Tree() {
987   int i,ierr;
988   void* jstatus;
989 
990   PetscFunctionBegin;
991 
992   if(PetscUseThreadPool) {
993     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
994     /* join the threads */
995     for(i=0; i<PetscMaxThreads; i++) {
996       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
997       /* do error checking*/
998     }
999     free(PetscThreadPoint);
1000     free(arrmutex);
1001     free(arrcond1);
1002     free(arrcond2);
1003     free(arrstart);
1004     free(arrready);
1005     free(job_tree.pdata);
1006     free(pVal);
1007   }
1008   else {
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MainWait_Tree"
1015 void MainWait_Tree() {
1016   int ierr;
1017   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1018   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1019     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1020   }
1021   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MainJob_Tree"
1026 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1027   int i,ierr;
1028   PetscErrorCode ijoberr = 0;
1029   if(PetscUseThreadPool) {
1030     MainWait();
1031     job_tree.pfunc = pFunc;
1032     job_tree.pdata = data;
1033     job_tree.startJob = PETSC_TRUE;
1034     for(i=0; i<PetscMaxThreads; i++) {
1035       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1036     }
1037     job_tree.eJobStat = JobInitiated;
1038     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1039     if(pFunc!=FuncFinish) {
1040       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1041     }
1042   }
1043   else {
1044     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1045     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1046     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1047     free(apThread);
1048   }
1049   if(ithreaderr) {
1050     ijoberr = ithreaderr;
1051   }
1052   return ijoberr;
1053 }
1054 /****  ****/
1055 
1056 /**** 'Main' Thread Pool Functions ****/
1057 void* PetscThreadFunc_Main(void* arg) {
1058   PetscErrorCode iterr;
1059   int icorr,ierr;
1060   int* pId = (int*)arg;
1061   int ThreadId = *pId;
1062   cpu_set_t mset;
1063   //printf("Thread %d In Main Thread Function\n",ThreadId);
1064   icorr = ThreadCoreAffinity[ThreadId];
1065   CPU_ZERO(&mset);
1066   CPU_SET(icorr,&mset);
1067   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1068 
1069   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1070   /* update your ready status */
1071   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1072   /* tell the BOSS that you're ready to work before you go to sleep */
1073   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1074 
1075   /* the while loop needs to have an exit
1076      the 'main' thread can terminate all the threads by performing a broadcast
1077      and calling FuncFinish */
1078   while(PetscThreadGo) {
1079     /* need to check the condition to ensure we don't have to wait
1080        waiting when you don't have to causes problems
1081      also need to check the condition to ensure proper handling of spurious wakeups */
1082     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1083       /* upon entry, atomically releases the lock and blocks
1084        upon return, has the lock */
1085         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1086 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1087     }
1088     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1089     if(job_main.pdata==NULL) {
1090       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1091     }
1092     else {
1093       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1094     }
1095     if(iterr!=0) {
1096       ithreaderr = 1;
1097     }
1098     if(PetscThreadGo) {
1099       /* reset job, get ready for more */
1100       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1101       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1102       /* tell the BOSS that you're ready to work before you go to sleep */
1103       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1104     }
1105   }
1106   return NULL;
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "PetscThreadInitialize_Main"
1111 void* PetscThreadInitialize_Main(PetscInt N) {
1112   PetscInt i,ierr;
1113   int status;
1114 
1115   if(PetscUseThreadPool) {
1116     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1117     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1118     arrmutex = (char*)memalign(Val1,Val2);
1119     arrcond1 = (char*)memalign(Val1,Val2);
1120     arrcond2 = (char*)memalign(Val1,Val2);
1121     arrstart = (char*)memalign(Val1,Val2);
1122     arrready = (char*)memalign(Val1,Val2);
1123     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1124     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1125     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1126     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1127     /* initialize job structure */
1128     for(i=0; i<PetscMaxThreads; i++) {
1129       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1130       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1131       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1132       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1133     }
1134     for(i=0; i<PetscMaxThreads; i++) {
1135       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1136       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1137       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1138       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1139     }
1140     job_main.pfunc = NULL;
1141     job_main.pdata = (void**)malloc(N*sizeof(void*));
1142     pVal = (int*)malloc(N*sizeof(int));
1143     /* allocate memory in the heap for the thread structure */
1144     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1145     /* create threads */
1146     for(i=0; i<N; i++) {
1147       pVal[i] = i;
1148       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1149       /* error check */
1150     }
1151   }
1152   else {
1153   }
1154   return NULL;
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "PetscThreadFinalize_Main"
1159 PetscErrorCode PetscThreadFinalize_Main() {
1160   int i,ierr;
1161   void* jstatus;
1162 
1163   PetscFunctionBegin;
1164 
1165   if(PetscUseThreadPool) {
1166     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1167     /* join the threads */
1168     for(i=0; i<PetscMaxThreads; i++) {
1169       ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1170     }
1171     free(PetscThreadPoint);
1172     free(arrmutex);
1173     free(arrcond1);
1174     free(arrcond2);
1175     free(arrstart);
1176     free(arrready);
1177     free(job_main.pdata);
1178     free(pVal);
1179   }
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MainWait_Main"
1185 void MainWait_Main() {
1186   int i,ierr;
1187   for(i=0; i<PetscMaxThreads; i++) {
1188     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1189     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1190       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1191     }
1192     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1193   }
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MainJob_Main"
1198 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1199   int i,ierr;
1200   PetscErrorCode ijoberr = 0;
1201   if(PetscUseThreadPool) {
1202     MainWait(); /* you know everyone is waiting to be signalled! */
1203     job_main.pfunc = pFunc;
1204     job_main.pdata = data;
1205     for(i=0; i<PetscMaxThreads; i++) {
1206       *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1207     }
1208     /* tell the threads to go to work */
1209     for(i=0; i<PetscMaxThreads; i++) {
1210       ierr = pthread_cond_signal(job_main.cond2array[i]);
1211     }
1212     if(pFunc!=FuncFinish) {
1213       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1214     }
1215   }
1216   else {
1217     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1218     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1219     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1220     free(apThread);
1221   }
1222   if(ithreaderr) {
1223     ijoberr = ithreaderr;
1224   }
1225   return ijoberr;
1226 }
1227 /****  ****/
1228 
1229 /**** Chain Thread Functions ****/
1230 void* PetscThreadFunc_Chain(void* arg) {
1231   PetscErrorCode iterr;
1232   int icorr,ierr;
1233   int* pId = (int*)arg;
1234   int ThreadId = *pId;
1235   int SubWorker = ThreadId + 1;
1236   PetscBool PeeOn;
1237   cpu_set_t mset;
1238   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1239   icorr = ThreadCoreAffinity[ThreadId];
1240   CPU_ZERO(&mset);
1241   CPU_SET(icorr,&mset);
1242   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1243 
1244   if(ThreadId==(PetscMaxThreads-1)) {
1245     PeeOn = PETSC_TRUE;
1246   }
1247   else {
1248     PeeOn = PETSC_FALSE;
1249   }
1250   if(PeeOn==PETSC_FALSE) {
1251     /* check your subordinate, wait for him to be ready */
1252     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1253     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1254       /* upon entry, automically releases the lock and blocks
1255        upon return, has the lock */
1256       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1257     }
1258     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1259     /* your subordinate is now ready*/
1260   }
1261   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1262   /* update your ready status */
1263   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1264   if(ThreadId==0) {
1265     job_chain.eJobStat = JobCompleted;
1266     /* signal main */
1267     ierr = pthread_cond_signal(&main_cond);
1268   }
1269   else {
1270     /* tell your boss that you're ready to work */
1271     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1272   }
1273   /*  the while loop needs to have an exit
1274      the 'main' thread can terminate all the threads by performing a broadcast
1275    and calling FuncFinish */
1276   while(PetscThreadGo) {
1277     /* need to check the condition to ensure we don't have to wait
1278        waiting when you don't have to causes problems
1279      also need to check the condition to ensure proper handling of spurious wakeups */
1280     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1281       /*upon entry, automically releases the lock and blocks
1282        upon return, has the lock */
1283         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1284 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1285 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1286     }
1287     if(ThreadId==0) {
1288       job_chain.startJob = PETSC_FALSE;
1289       job_chain.eJobStat = ThreadsWorking;
1290     }
1291     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1292     if(PeeOn==PETSC_FALSE) {
1293       /* tell your subworker it's time to get to work */
1294       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1295     }
1296     /* do your job */
1297     if(job_chain.pdata==NULL) {
1298       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1299     }
1300     else {
1301       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1302     }
1303     if(iterr!=0) {
1304       ithreaderr = 1;
1305     }
1306     if(PetscThreadGo) {
1307       /* reset job, get ready for more */
1308       if(PeeOn==PETSC_FALSE) {
1309         /* check your subordinate, wait for him to be ready
1310          how do you know for a fact that your subordinate has actually started? */
1311         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1312         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1313           /* upon entry, automically releases the lock and blocks
1314            upon return, has the lock */
1315           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1316         }
1317         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1318         /* your subordinate is now ready */
1319       }
1320       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1321       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1322       if(ThreadId==0) {
1323 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1324         /* root thread (foreman) signals 'main' */
1325         ierr = pthread_cond_signal(&main_cond);
1326       }
1327       else {
1328         /* signal your boss before you go to sleep */
1329         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1330       }
1331     }
1332   }
1333   return NULL;
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "PetscThreadInitialize_Chain"
1338 void* PetscThreadInitialize_Chain(PetscInt N) {
1339   PetscInt i,ierr;
1340   int status;
1341 
1342   if(PetscUseThreadPool) {
1343     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1344     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1345     arrmutex = (char*)memalign(Val1,Val2);
1346     arrcond1 = (char*)memalign(Val1,Val2);
1347     arrcond2 = (char*)memalign(Val1,Val2);
1348     arrstart = (char*)memalign(Val1,Val2);
1349     arrready = (char*)memalign(Val1,Val2);
1350     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1351     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1352     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1353     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1354     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1355     /* initialize job structure */
1356     for(i=0; i<PetscMaxThreads; i++) {
1357       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1358       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1359       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1360       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1361       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1362     }
1363     for(i=0; i<PetscMaxThreads; i++) {
1364       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1365       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1366       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1367       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1368       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1369     }
1370     job_chain.pfunc = NULL;
1371     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1372     job_chain.startJob = PETSC_FALSE;
1373     job_chain.eJobStat = JobInitiated;
1374     pVal = (int*)malloc(N*sizeof(int));
1375     /* allocate memory in the heap for the thread structure */
1376     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1377     /* create threads */
1378     for(i=0; i<N; i++) {
1379       pVal[i] = i;
1380       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1381       /* should check error */
1382     }
1383   }
1384   else {
1385   }
1386   return NULL;
1387 }
1388 
1389 
1390 #undef __FUNCT__
1391 #define __FUNCT__ "PetscThreadFinalize_Chain"
1392 PetscErrorCode PetscThreadFinalize_Chain() {
1393   int i,ierr;
1394   void* jstatus;
1395 
1396   PetscFunctionBegin;
1397 
1398   if(PetscUseThreadPool) {
1399     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1400     /* join the threads */
1401     for(i=0; i<PetscMaxThreads; i++) {
1402       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1403       /* should check error */
1404     }
1405     free(PetscThreadPoint);
1406     free(arrmutex);
1407     free(arrcond1);
1408     free(arrcond2);
1409     free(arrstart);
1410     free(arrready);
1411     free(job_chain.pdata);
1412     free(pVal);
1413   }
1414   else {
1415   }
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MainWait_Chain"
1421 void MainWait_Chain() {
1422   int ierr;
1423   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1424   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1425     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1426   }
1427   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MainJob_Chain"
1432 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1433   int i,ierr;
1434   PetscErrorCode ijoberr = 0;
1435   if(PetscUseThreadPool) {
1436     MainWait();
1437     job_chain.pfunc = pFunc;
1438     job_chain.pdata = data;
1439     job_chain.startJob = PETSC_TRUE;
1440     for(i=0; i<PetscMaxThreads; i++) {
1441       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1442     }
1443     job_chain.eJobStat = JobInitiated;
1444     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1445     if(pFunc!=FuncFinish) {
1446       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1447     }
1448   }
1449   else {
1450     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1451     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1452     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1453     free(apThread);
1454   }
1455   if(ithreaderr) {
1456     ijoberr = ithreaderr;
1457   }
1458   return ijoberr;
1459 }
1460 /****  ****/
1461 
1462 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1463 /**** True Thread Functions ****/
1464 void* PetscThreadFunc_True(void* arg) {
1465   int icorr,ierr,iVal;
1466   int* pId = (int*)arg;
1467   int ThreadId = *pId;
1468   PetscErrorCode iterr;
1469   cpu_set_t mset;
1470   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1471   icorr = ThreadCoreAffinity[ThreadId];
1472   CPU_ZERO(&mset);
1473   CPU_SET(icorr,&mset);
1474   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1475 
1476   ierr = pthread_mutex_lock(&job_true.mutex);
1477   job_true.iNumReadyThreads++;
1478   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1479     ierr = pthread_cond_signal(&main_cond);
1480   }
1481   /*the while loop needs to have an exit
1482     the 'main' thread can terminate all the threads by performing a broadcast
1483    and calling FuncFinish */
1484   while(PetscThreadGo) {
1485     /*need to check the condition to ensure we don't have to wait
1486       waiting when you don't have to causes problems
1487      also need to wait if another thread sneaks in and messes with the predicate */
1488     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1489       /* upon entry, automically releases the lock and blocks
1490        upon return, has the lock */
1491       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1492     }
1493     job_true.startJob = PETSC_FALSE;
1494     job_true.iNumJobThreads--;
1495     job_true.iNumReadyThreads--;
1496     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1497     pthread_mutex_unlock(&job_true.mutex);
1498     if(job_true.pdata==NULL) {
1499       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1500     }
1501     else {
1502       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1503     }
1504     if(iterr!=0) {
1505       ithreaderr = 1;
1506     }
1507     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1508       what happens if a thread finishes before they all start? BAD!
1509      what happens if a thread finishes before any else start? BAD! */
1510     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1511     /* reset job */
1512     if(PetscThreadGo) {
1513       pthread_mutex_lock(&job_true.mutex);
1514       job_true.iNumReadyThreads++;
1515       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1516 	/* signal the 'main' thread that the job is done! (only done once) */
1517 	ierr = pthread_cond_signal(&main_cond);
1518       }
1519     }
1520   }
1521   return NULL;
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "PetscThreadInitialize_True"
1526 void* PetscThreadInitialize_True(PetscInt N) {
1527   PetscInt i;
1528   int status;
1529 
1530   if(PetscUseThreadPool) {
1531     pVal = (int*)malloc(N*sizeof(int));
1532     /* allocate memory in the heap for the thread structure */
1533     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1534     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1535     job_true.pdata = (void**)malloc(N*sizeof(void*));
1536     for(i=0; i<N; i++) {
1537       pVal[i] = i;
1538       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1539       /* error check to ensure proper thread creation */
1540       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1541       /* should check error */
1542     }
1543   }
1544   else {
1545   }
1546   return NULL;
1547 }
1548 
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "PetscThreadFinalize_True"
1552 PetscErrorCode PetscThreadFinalize_True() {
1553   int i,ierr;
1554   void* jstatus;
1555 
1556   PetscFunctionBegin;
1557 
1558   if(PetscUseThreadPool) {
1559     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1560     /* join the threads */
1561     for(i=0; i<PetscMaxThreads; i++) {
1562       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1563       /* should check error */
1564     }
1565     free(BarrPoint);
1566     free(PetscThreadPoint);
1567   }
1568   else {
1569   }
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MainWait_True"
1575 void MainWait_True() {
1576   int ierr;
1577   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1578     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1579   }
1580   ierr = pthread_mutex_unlock(&job_true.mutex);
1581 }
1582 
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MainJob_True"
1585 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1586   int ierr;
1587   PetscErrorCode ijoberr = 0;
1588   if(PetscUseThreadPool) {
1589     MainWait();
1590     job_true.pfunc = pFunc;
1591     job_true.pdata = data;
1592     job_true.pbarr = &BarrPoint[n];
1593     job_true.iNumJobThreads = n;
1594     job_true.startJob = PETSC_TRUE;
1595     ierr = pthread_cond_broadcast(&job_true.cond);
1596     if(pFunc!=FuncFinish) {
1597       MainWait(); /* why wait after? guarantees that job gets done */
1598     }
1599   }
1600   else {
1601     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1602     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1603     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1604     free(apThread);
1605   }
1606   if(ithreaderr) {
1607     ijoberr = ithreaderr;
1608   }
1609   return ijoberr;
1610 }
1611 /**** NO THREAD POOL FUNCTION ****/
1612 #undef __FUNCT__
1613 #define __FUNCT__ "MainJob_Spawn"
1614 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1615   PetscErrorCode ijoberr = 0;
1616 
1617   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1618   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1619   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1620   free(apThread);
1621 
1622   return ijoberr;
1623 }
1624 /****  ****/
1625 #endif
1626 
1627 void* FuncFinish(void* arg) {
1628   PetscThreadGo = PETSC_FALSE;
1629   return(0);
1630 }
1631 
1632 #endif
1633