xref: /petsc/src/sys/objects/init.c (revision ba61063db8cd96e5a5f702c43a4ce6e281e3e2a6)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #define _GNU_SOURCE
10 #include <sched.h>
11 #include <petscsys.h>        /*I  "petscsys.h"   I*/
12 #if defined(PETSC_USE_PTHREAD)
13 #include <pthread.h>
14 #endif
15 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
16 #include <sys/sysinfo.h>
17 #endif
18 #include <unistd.h>
19 #if defined(PETSC_HAVE_STDLIB_H)
20 #include <stdlib.h>
21 #endif
22 #if defined(PETSC_HAVE_MALLOC_H)
23 #include <malloc.h>
24 #endif
25 #if defined(PETSC_HAVE_VALGRIND)
26 #include <valgrind/valgrind.h>
27 #endif
28 
29 /* ------------------------Nasty global variables -------------------------------*/
30 /*
31      Indicates if PETSc started up MPI, or it was
32    already started before PETSc was initialized.
33 */
34 PetscBool    PetscBeganMPI         = PETSC_FALSE;
35 PetscBool    PetscInitializeCalled = PETSC_FALSE;
36 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
37 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
38 PetscBool    PetscThreadGo         = PETSC_TRUE;
39 PetscMPIInt  PetscGlobalRank = -1;
40 PetscMPIInt  PetscGlobalSize = -1;
41 
42 #if defined(PETSC_USE_PTHREAD_CLASSES)
43 PetscMPIInt  PetscMaxThreads = 2;
44 pthread_t*   PetscThreadPoint;
45 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
46 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
47 #endif
48 PetscErrorCode ithreaderr = 0;
49 int*         pVal;
50 
51 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
52 int* ThreadCoreAffinity;
53 
54 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
55 
56 typedef struct {
57   pthread_mutex_t** mutexarray;
58   pthread_cond_t**  cond1array;
59   pthread_cond_t** cond2array;
60   void* (*pfunc)(void*);
61   void** pdata;
62   PetscBool startJob;
63   estat eJobStat;
64   PetscBool** arrThreadStarted;
65   PetscBool** arrThreadReady;
66 } sjob_tree;
67 sjob_tree job_tree;
68 typedef struct {
69   pthread_mutex_t** mutexarray;
70   pthread_cond_t**  cond1array;
71   pthread_cond_t** cond2array;
72   void* (*pfunc)(void*);
73   void** pdata;
74   PetscBool** arrThreadReady;
75 } sjob_main;
76 sjob_main job_main;
77 typedef struct {
78   pthread_mutex_t** mutexarray;
79   pthread_cond_t**  cond1array;
80   pthread_cond_t** cond2array;
81   void* (*pfunc)(void*);
82   void** pdata;
83   PetscBool startJob;
84   estat eJobStat;
85   PetscBool** arrThreadStarted;
86   PetscBool** arrThreadReady;
87 } sjob_chain;
88 sjob_chain job_chain;
89 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
90 typedef struct {
91   pthread_mutex_t mutex;
92   pthread_cond_t cond;
93   void* (*pfunc)(void*);
94   void** pdata;
95   pthread_barrier_t* pbarr;
96   int iNumJobThreads;
97   int iNumReadyThreads;
98   PetscBool startJob;
99 } sjob_true;
100 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
101 #endif
102 
103 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
104 char* arrmutex; /* used by 'chain','main','tree' thread pools */
105 char* arrcond1; /* used by 'chain','main','tree' thread pools */
106 char* arrcond2; /* used by 'chain','main','tree' thread pools */
107 char* arrstart; /* used by 'chain','main','tree' thread pools */
108 char* arrready; /* used by 'chain','main','tree' thread pools */
109 
110 /* Function Pointers */
111 void*          (*PetscThreadFunc)(void*) = NULL;
112 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
113 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
114 void           (*MainWait)(void) = NULL;
115 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
116 /**** Tree Functions ****/
117 void*          PetscThreadFunc_Tree(void*);
118 void*          PetscThreadInitialize_Tree(PetscInt);
119 PetscErrorCode PetscThreadFinalize_Tree(void);
120 void           MainWait_Tree(void);
121 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
122 /**** Main Functions ****/
123 void*          PetscThreadFunc_Main(void*);
124 void*          PetscThreadInitialize_Main(PetscInt);
125 PetscErrorCode PetscThreadFinalize_Main(void);
126 void           MainWait_Main(void);
127 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
128 /**** Chain Functions ****/
129 void*          PetscThreadFunc_Chain(void*);
130 void*          PetscThreadInitialize_Chain(PetscInt);
131 PetscErrorCode PetscThreadFinalize_Chain(void);
132 void           MainWait_Chain(void);
133 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
134 /**** True Functions ****/
135 void*          PetscThreadFunc_True(void*);
136 void*          PetscThreadInitialize_True(PetscInt);
137 PetscErrorCode PetscThreadFinalize_True(void);
138 void           MainWait_True(void);
139 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
140 /****  ****/
141 
142 void* FuncFinish(void*);
143 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
144 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
145 #endif
146 
147 #if defined(PETSC_USE_COMPLEX)
148 #if defined(PETSC_COMPLEX_INSTANTIATE)
149 template <> class std::complex<double>; /* instantiate complex template class */
150 #endif
151 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
152 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
153 MPI_Datatype   MPI_C_COMPLEX;
154 #endif
155 PetscScalar    PETSC_i;
156 #else
157 PetscScalar    PETSC_i = 0.0;
158 #endif
159 #if defined(PETSC_USE_REAL___FLOAT128)
160 MPI_Datatype   MPIU___FLOAT128 = 0;
161 #endif
162 MPI_Datatype   MPIU_2SCALAR = 0;
163 MPI_Datatype   MPIU_2INT = 0;
164 
165 /*
166      These are needed by petscbt.h
167 */
168 #include <petscbt.h>
169 char      _BT_mask = ' ';
170 char      _BT_c = ' ';
171 PetscInt  _BT_idx  = 0;
172 
173 /*
174        Function that is called to display all error messages
175 */
176 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
177 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
178 #if defined(PETSC_HAVE_MATLAB_ENGINE)
179 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
180 #else
181 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
182 #endif
183 /*
184   This is needed to turn on/off cusp synchronization */
185 PetscBool   synchronizeCUSP = PETSC_FALSE;
186 
187 /* ------------------------------------------------------------------------------*/
188 /*
189    Optional file where all PETSc output from various prints is saved
190 */
191 FILE *petsc_history = PETSC_NULL;
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "PetscOpenHistoryFile"
195 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
196 {
197   PetscErrorCode ierr;
198   PetscMPIInt    rank,size;
199   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
200   char           version[256];
201 
202   PetscFunctionBegin;
203   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
204   if (!rank) {
205     char        arch[10];
206     int         err;
207     PetscViewer viewer;
208 
209     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
210     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
211     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
212     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
213     if (filename) {
214       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
215     } else {
216       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
217       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
218       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
219     }
220 
221     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
222     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
223     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
224     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
225     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
226     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
227     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
228     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
229     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
230     err = fflush(*fd);
231     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
232   }
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "PetscCloseHistoryFile"
238 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
239 {
240   PetscErrorCode ierr;
241   PetscMPIInt    rank;
242   char           date[64];
243   int            err;
244 
245   PetscFunctionBegin;
246   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
247   if (!rank) {
248     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
249     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
250     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
251     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
252     err = fflush(*fd);
253     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
254     err = fclose(*fd);
255     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 /* ------------------------------------------------------------------------------*/
261 
262 /*
263    This is ugly and probably belongs somewhere else, but I want to
264   be able to put a true MPI abort error handler with command line args.
265 
266     This is so MPI errors in the debugger will leave all the stack
267   frames. The default MP_Abort() cleans up and exits thus providing no useful information
268   in the debugger hence we call abort() instead of MPI_Abort().
269 */
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "Petsc_MPI_AbortOnError"
273 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
274 {
275   PetscFunctionBegin;
276   (*PetscErrorPrintf)("MPI error %d\n",*flag);
277   abort();
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
282 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
283 {
284   PetscErrorCode ierr;
285 
286   PetscFunctionBegin;
287   (*PetscErrorPrintf)("MPI error %d\n",*flag);
288   ierr = PetscAttachDebugger();
289   if (ierr) { /* hopeless so get out */
290     MPI_Abort(*comm,*flag);
291   }
292 }
293 
294 #undef __FUNCT__
295 #define __FUNCT__ "PetscEnd"
296 /*@C
297    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
298      wishes a clean exit somewhere deep in the program.
299 
300    Collective on PETSC_COMM_WORLD
301 
302    Options Database Keys are the same as for PetscFinalize()
303 
304    Level: advanced
305 
306    Note:
307    See PetscInitialize() for more general runtime options.
308 
309 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
310 @*/
311 PetscErrorCode  PetscEnd(void)
312 {
313   PetscFunctionBegin;
314   PetscFinalize();
315   exit(0);
316   return 0;
317 }
318 
319 PetscBool    PetscOptionsPublish = PETSC_FALSE;
320 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
321 extern PetscBool  petscsetmallocvisited;
322 static char       emacsmachinename[256];
323 
324 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
325 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "PetscSetHelpVersionFunctions"
329 /*@C
330    PetscSetHelpVersionFunctions - Sets functions that print help and version information
331    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
332    This routine enables a "higher-level" package that uses PETSc to print its messages first.
333 
334    Input Parameter:
335 +  help - the help function (may be PETSC_NULL)
336 -  version - the version function (may be PETSC_NULL)
337 
338    Level: developer
339 
340    Concepts: package help message
341 
342 @*/
343 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
344 {
345   PetscFunctionBegin;
346   PetscExternalHelpFunction    = help;
347   PetscExternalVersionFunction = version;
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNCT__
352 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
353 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
354 {
355   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
356   MPI_Comm       comm = PETSC_COMM_WORLD;
357   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
358   PetscErrorCode ierr;
359   PetscReal      si;
360   int            i;
361   PetscMPIInt    rank;
362   char           version[256];
363 
364   PetscFunctionBegin;
365   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
366 
367   /*
368       Setup the memory management; support for tracing malloc() usage
369   */
370   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
371 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
372   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
373   if ((!flg2 || flg1) && !petscsetmallocvisited) {
374 #if defined(PETSC_HAVE_VALGRIND)
375     if (flg2 || !(RUNNING_ON_VALGRIND)) {
376       /* turn off default -malloc if valgrind is being used */
377 #endif
378       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
379 #if defined(PETSC_HAVE_VALGRIND)
380     }
381 #endif
382   }
383 #else
384   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
385   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
386   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
387 #endif
388   if (flg3) {
389     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
390   }
391   flg1 = PETSC_FALSE;
392   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
393   if (flg1) {
394     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
395     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
396   }
397 
398   flg1 = PETSC_FALSE;
399   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
400   if (!flg1) {
401     flg1 = PETSC_FALSE;
402     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
403   }
404   if (flg1) {
405     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
406   }
407 
408   /*
409       Set the display variable for graphics
410   */
411   ierr = PetscSetDisplay();CHKERRQ(ierr);
412 
413   /*
414       Determine whether user specified maximum number of threads
415    */
416   ierr = PetscOptionsHasName(PETSC_NULL,"-thread_max",&flg1);CHKERRQ(ierr);
417   if(flg1) {
418     ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
419   }
420 
421   /*
422       Determine whether to use thread pool
423    */
424   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
425   if(flg1) {
426     PetscUseThreadPool = PETSC_TRUE;
427     PetscInt N_CORES = get_nprocs();
428     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
429     char tstr[9];
430     char tbuf[2];
431     strcpy(tstr,"-thread");
432     for(i=0;i<PetscMaxThreads;i++) {
433       ThreadCoreAffinity[i] = i;
434       sprintf(tbuf,"%d",i);
435       strcat(tstr,tbuf);
436       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
437       if(flg1) {
438         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
439         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
440       }
441       tstr[7] = '\0';
442     }
443     /* get the thread pool type */
444     PetscInt ipool = 0;
445     ierr = PetscOptionsGetInt(PETSC_NULL,"-pool",&ipool,PETSC_NULL);CHKERRQ(ierr);
446     switch(ipool) {
447     case 1:
448       PetscThreadFunc       = &PetscThreadFunc_Tree;
449       PetscThreadInitialize = &PetscThreadInitialize_Tree;
450       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
451       MainWait              = &MainWait_Tree;
452       MainJob               = &MainJob_Tree;
453       break;
454     case 2:
455       PetscThreadFunc       = &PetscThreadFunc_Main;
456       PetscThreadInitialize = &PetscThreadInitialize_Main;
457       PetscThreadFinalize   = &PetscThreadFinalize_Main;
458       MainWait              = &MainWait_Main;
459       MainJob               = &MainJob_Main;
460       break;
461     case 3:
462       PetscThreadFunc       = &PetscThreadFunc_Chain;
463       PetscThreadInitialize = &PetscThreadInitialize_Chain;
464       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
465       MainWait              = &MainWait_Chain;
466       MainJob               = &MainJob_Chain;
467       break;
468     default:
469       PetscThreadFunc       = &PetscThreadFunc_True;
470       PetscThreadInitialize = &PetscThreadInitialize_True;
471       PetscThreadFinalize   = &PetscThreadFinalize_True;
472       MainWait              = &MainWait_True;
473       MainJob               = &MainJob_True;
474       break;
475     }
476   }
477   PetscThreadInitialize(PetscMaxThreads);
478   /*
479       Print the PETSc version information
480   */
481   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
482   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
483   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
484   if (flg1 || flg2 || flg3){
485 
486     /*
487        Print "higher-level" package version message
488     */
489     if (PetscExternalVersionFunction) {
490       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
491     }
492 
493     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
494     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
495 ------------------------------\n");CHKERRQ(ierr);
496     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
497     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
498     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
499     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
500     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
501     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
502     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
503 ------------------------------\n");CHKERRQ(ierr);
504   }
505 
506   /*
507        Print "higher-level" package help message
508   */
509   if (flg3){
510     if (PetscExternalHelpFunction) {
511       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
512     }
513   }
514 
515   /*
516       Setup the error handling
517   */
518   flg1 = PETSC_FALSE;
519   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
520   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
521   flg1 = PETSC_FALSE;
522   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
523   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
524   flg1 = PETSC_FALSE;
525   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
526   if (flg1) {
527     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
528   }
529   flg1 = PETSC_FALSE;
530   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
531   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
532   flg1 = PETSC_FALSE;
533   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
534   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
535 
536   /*
537       Setup debugger information
538   */
539   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
540   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
541   if (flg1) {
542     MPI_Errhandler err_handler;
543 
544     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
545     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
546     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
547     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
548   }
549   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
550   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
551   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
552   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
553   if (flg1 || flg2) {
554     PetscMPIInt    size;
555     PetscInt       lsize,*nodes;
556     MPI_Errhandler err_handler;
557     /*
558        we have to make sure that all processors have opened
559        connections to all other processors, otherwise once the
560        debugger has stated it is likely to receive a SIGUSR1
561        and kill the program.
562     */
563     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
564     if (size > 2) {
565       PetscMPIInt dummy = 0;
566       MPI_Status  status;
567       for (i=0; i<size; i++) {
568         if (rank != i) {
569           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
570         }
571       }
572       for (i=0; i<size; i++) {
573         if (rank != i) {
574           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
575         }
576       }
577     }
578     /* check if this processor node should be in debugger */
579     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
580     lsize = size;
581     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
582     if (flag) {
583       for (i=0; i<lsize; i++) {
584         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
585       }
586     }
587     if (!flag) {
588       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
589       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
590       if (flg1) {
591         ierr = PetscAttachDebugger();CHKERRQ(ierr);
592       } else {
593         ierr = PetscStopForDebugger();CHKERRQ(ierr);
594       }
595       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
596       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
597     }
598     ierr = PetscFree(nodes);CHKERRQ(ierr);
599   }
600 
601   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
602   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
603 
604 #if defined(PETSC_USE_SOCKET_VIEWER)
605   /*
606     Activates new sockets for zope if needed
607   */
608   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
609   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
610   if (flgz){
611     int  sockfd;
612     char hostname[256];
613     char username[256];
614     int  remoteport = 9999;
615 
616     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
617     if (!hostname[0]){
618       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
619     }
620     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
621     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
622     PETSC_ZOPEFD = fdopen(sockfd, "w");
623     if (flgzout){
624       PETSC_STDOUT = PETSC_ZOPEFD;
625       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
626       fprintf(PETSC_STDOUT, "<<<start>>>");
627     } else {
628       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
629       fprintf(PETSC_ZOPEFD, "<<<start>>>");
630     }
631   }
632 #endif
633 #if defined(PETSC_USE_SERVER)
634   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
635   if (flgz){
636     PetscInt port = PETSC_DECIDE;
637     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
638     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
639   }
640 #endif
641 
642   /*
643         Setup profiling and logging
644   */
645 #if defined (PETSC_USE_INFO)
646   {
647     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
648     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
649     if (flg1 && logname[0]) {
650       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
651     } else if (flg1) {
652       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
653     }
654   }
655 #endif
656 #if defined(PETSC_USE_LOG)
657   mname[0] = 0;
658   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
659   if (flg1) {
660     if (mname[0]) {
661       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
662     } else {
663       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
664     }
665   }
666 #if defined(PETSC_HAVE_MPE)
667   flg1 = PETSC_FALSE;
668   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
669   if (flg1) PetscLogMPEBegin();
670 #endif
671   flg1 = PETSC_FALSE;
672   flg2 = PETSC_FALSE;
673   flg3 = PETSC_FALSE;
674   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
675   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
676   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
677   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
678   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
679   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
680 
681   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
682   if (flg1) {
683     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
684     FILE *file;
685     if (mname[0]) {
686       sprintf(name,"%s.%d",mname,rank);
687       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
688       file = fopen(fname,"w");
689       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
690     } else {
691       file = PETSC_STDOUT;
692     }
693     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
694   }
695 #endif
696 
697   /*
698       Setup building of stack frames for all function calls
699   */
700 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
701   ierr = PetscStackCreate();CHKERRQ(ierr);
702 #endif
703 
704   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
705 
706   /*
707        Print basic help message
708   */
709   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
710   if (flg1) {
711     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
712     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
713     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
714     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
715     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
716     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
717     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
718     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
719     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
720     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
721     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
722     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
723     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
724     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
725     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
726     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
727     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
728     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
729     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
730     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
731     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
732     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
733     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
734     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
735     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
736     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
737     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
738     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
739     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
740     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
741     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
742     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
743     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
744 #if defined(PETSC_USE_LOG)
745     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
748 #if defined(PETSC_HAVE_MPE)
749     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
750 #endif
751     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
752 #endif
753     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
757   }
758 
759   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
760   if (flg1) {
761     ierr = PetscSleep(si);CHKERRQ(ierr);
762   }
763 
764   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
765   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
766   if (f) {
767     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
768   }
769 
770 #if defined(PETSC_HAVE_CUSP)
771   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
772   if (flg3) flg1 = PETSC_TRUE;
773   else flg1 = PETSC_FALSE;
774   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
775   if (flg1) synchronizeCUSP = PETSC_TRUE;
776 #endif
777 
778   PetscFunctionReturn(0);
779 }
780 
781 #if defined(PETSC_USE_PTHREAD_CLASSES)
782 
783 /**** 'Tree' Thread Pool Functions ****/
784 void* PetscThreadFunc_Tree(void* arg) {
785   PetscErrorCode iterr;
786   int icorr,ierr;
787   int* pId = (int*)arg;
788   int ThreadId = *pId,Mary = 2,i,SubWorker;
789   PetscBool PeeOn;
790   cpu_set_t mset;
791 
792   icorr = ThreadCoreAffinity[ThreadId];
793   CPU_ZERO(&mset);
794   CPU_SET(icorr,&mset);
795   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
796 
797   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
798     PeeOn = PETSC_TRUE;
799   }
800   else {
801     PeeOn = PETSC_FALSE;
802   }
803   if(PeeOn==PETSC_FALSE) {
804     /* check your subordinates, wait for them to be ready */
805     for(i=1;i<=Mary;i++) {
806       SubWorker = Mary*ThreadId+i;
807       if(SubWorker<PetscMaxThreads) {
808         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
809         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
810           /* upon entry, automically releases the lock and blocks
811            upon return, has the lock */
812           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
813         }
814         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
815       }
816     }
817     /* your subordinates are now ready */
818   }
819   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
820   /* update your ready status */
821   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
822   if(ThreadId==0) {
823     job_tree.eJobStat = JobCompleted;
824     /* ignal main */
825     ierr = pthread_cond_signal(&main_cond);
826   }
827   else {
828     /* tell your boss that you're ready to work */
829     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
830   }
831   /* the while loop needs to have an exit
832   the 'main' thread can terminate all the threads by performing a broadcast
833    and calling FuncFinish */
834   while(PetscThreadGo) {
835     /*need to check the condition to ensure we don't have to wait
836       waiting when you don't have to causes problems
837      also need to check the condition to ensure proper handling of spurious wakeups */
838     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
839       /* upon entry, automically releases the lock and blocks
840        upon return, has the lock */
841         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
842 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
843 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
844     }
845     if(ThreadId==0) {
846       job_tree.startJob = PETSC_FALSE;
847       job_tree.eJobStat = ThreadsWorking;
848     }
849     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
850     if(PeeOn==PETSC_FALSE) {
851       /* tell your subordinates it's time to get to work */
852       for(i=1; i<=Mary; i++) {
853 	SubWorker = Mary*ThreadId+i;
854         if(SubWorker<PetscMaxThreads) {
855           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
856         }
857       }
858     }
859     /* do your job */
860     if(job_tree.pdata==NULL) {
861       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
862     }
863     else {
864       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
865     }
866     if(iterr!=0) {
867       ithreaderr = 1;
868     }
869     if(PetscThreadGo) {
870       /* reset job, get ready for more */
871       if(PeeOn==PETSC_FALSE) {
872         /* check your subordinates, waiting for them to be ready
873          how do you know for a fact that a given subordinate has actually started? */
874 	for(i=1;i<=Mary;i++) {
875 	  SubWorker = Mary*ThreadId+i;
876           if(SubWorker<PetscMaxThreads) {
877             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
878             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
879               /* upon entry, automically releases the lock and blocks
880                upon return, has the lock */
881               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
882             }
883             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
884           }
885 	}
886         /* your subordinates are now ready */
887       }
888       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
889       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
890       if(ThreadId==0) {
891 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
892         /* root thread signals 'main' */
893         ierr = pthread_cond_signal(&main_cond);
894       }
895       else {
896         /* signal your boss before you go to sleep */
897         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
898       }
899     }
900   }
901   return NULL;
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "PetscThreadInitialize_Tree"
906 void* PetscThreadInitialize_Tree(PetscInt N) {
907   PetscInt i,ierr;
908   int status;
909 
910   if(PetscUseThreadPool) {
911     size_t Val1 = (size_t)CACHE_LINE_SIZE;
912     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
913     arrmutex = (char*)memalign(Val1,Val2);
914     arrcond1 = (char*)memalign(Val1,Val2);
915     arrcond2 = (char*)memalign(Val1,Val2);
916     arrstart = (char*)memalign(Val1,Val2);
917     arrready = (char*)memalign(Val1,Val2);
918     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
919     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
920     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
921     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
922     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
923     /* initialize job structure */
924     for(i=0; i<PetscMaxThreads; i++) {
925       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
926       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
927       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
928       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
929       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
930     }
931     for(i=0; i<PetscMaxThreads; i++) {
932       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
933       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
934       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
935       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
936       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
937     }
938     job_tree.pfunc = NULL;
939     job_tree.pdata = (void**)malloc(N*sizeof(void*));
940     job_tree.startJob = PETSC_FALSE;
941     job_tree.eJobStat = JobInitiated;
942     pVal = (int*)malloc(N*sizeof(int));
943     /* allocate memory in the heap for the thread structure */
944     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
945     /* create threads */
946     for(i=0; i<N; i++) {
947       pVal[i] = i;
948       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
949       /* should check status */
950     }
951   }
952   return NULL;
953 }
954 
955 #undef __FUNCT__
956 #define __FUNCT__ "PetscThreadFinalize_Tree"
957 PetscErrorCode PetscThreadFinalize_Tree() {
958   int i,ierr;
959   void* jstatus;
960 
961   PetscFunctionBegin;
962 
963   if(PetscUseThreadPool) {
964     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
965     /* join the threads */
966     for(i=0; i<PetscMaxThreads; i++) {
967       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
968       /* do error checking*/
969     }
970     free(PetscThreadPoint);
971     free(arrmutex);
972     free(arrcond1);
973     free(arrcond2);
974     free(arrstart);
975     free(arrready);
976     free(job_tree.pdata);
977     free(pVal);
978   }
979   else {
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MainWait_Tree"
986 void MainWait_Tree() {
987   int ierr;
988   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
989   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
990     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
991   }
992   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
993 }
994 
995 #undef __FUNCT__
996 #define __FUNCT__ "MainJob_Tree"
997 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
998   int i,ierr;
999   PetscErrorCode ijoberr = 0;
1000   if(PetscUseThreadPool) {
1001     MainWait();
1002     job_tree.pfunc = pFunc;
1003     job_tree.pdata = data;
1004     job_tree.startJob = PETSC_TRUE;
1005     for(i=0; i<PetscMaxThreads; i++) {
1006       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1007     }
1008     job_tree.eJobStat = JobInitiated;
1009     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1010     if(pFunc!=FuncFinish) {
1011       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1012     }
1013   }
1014   else {
1015     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1016     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1017     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1018     free(apThread);
1019   }
1020   if(ithreaderr) {
1021     ijoberr = ithreaderr;
1022   }
1023   return ijoberr;
1024 }
1025 /****  ****/
1026 
1027 /**** 'Main' Thread Pool Functions ****/
1028 void* PetscThreadFunc_Main(void* arg) {
1029   PetscErrorCode iterr;
1030   int icorr,ierr;
1031   int* pId = (int*)arg;
1032   int ThreadId = *pId;
1033   cpu_set_t mset;
1034 
1035   icorr = ThreadCoreAffinity[ThreadId];
1036   CPU_ZERO(&mset);
1037   CPU_SET(icorr,&mset);
1038   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1039 
1040   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1041   /* update your ready status */
1042   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1043   /* tell the BOSS that you're ready to work before you go to sleep */
1044   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1045 
1046   /* the while loop needs to have an exit
1047      the 'main' thread can terminate all the threads by performing a broadcast
1048      and calling FuncFinish */
1049   while(PetscThreadGo) {
1050     /* need to check the condition to ensure we don't have to wait
1051        waiting when you don't have to causes problems
1052      also need to check the condition to ensure proper handling of spurious wakeups */
1053     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1054       /* upon entry, atomically releases the lock and blocks
1055        upon return, has the lock */
1056         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1057 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1058     }
1059     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1060     if(job_main.pdata==NULL) {
1061       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1062     }
1063     else {
1064       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1065     }
1066     if(iterr!=0) {
1067       ithreaderr = 1;
1068     }
1069     if(PetscThreadGo) {
1070       /* reset job, get ready for more */
1071       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1072       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1073       /* tell the BOSS that you're ready to work before you go to sleep */
1074       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1075     }
1076   }
1077   return NULL;
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "PetscThreadInitialize_Main"
1082 void* PetscThreadInitialize_Main(PetscInt N) {
1083   PetscInt i,ierr;
1084   int status;
1085 
1086   if(PetscUseThreadPool) {
1087     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1088     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1089     arrmutex = (char*)memalign(Val1,Val2);
1090     arrcond1 = (char*)memalign(Val1,Val2);
1091     arrcond2 = (char*)memalign(Val1,Val2);
1092     arrstart = (char*)memalign(Val1,Val2);
1093     arrready = (char*)memalign(Val1,Val2);
1094     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1095     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1096     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1097     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1098     /* initialize job structure */
1099     for(i=0; i<PetscMaxThreads; i++) {
1100       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1101       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1102       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1103       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1104     }
1105     for(i=0; i<PetscMaxThreads; i++) {
1106       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1107       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1108       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1109       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1110     }
1111     job_main.pfunc = NULL;
1112     job_main.pdata = (void**)malloc(N*sizeof(void*));
1113     pVal = (int*)malloc(N*sizeof(int));
1114     /* allocate memory in the heap for the thread structure */
1115     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1116     /* create threads */
1117     for(i=0; i<N; i++) {
1118       pVal[i] = i;
1119       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1120       /* error check */
1121     }
1122   }
1123   else {
1124   }
1125   return NULL;
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "PetscThreadFinalize_Main"
1130 PetscErrorCode PetscThreadFinalize_Main() {
1131   int i,ierr;
1132   void* jstatus;
1133 
1134   PetscFunctionBegin;
1135 
1136   if(PetscUseThreadPool) {
1137     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1138     /* join the threads */
1139     for(i=0; i<PetscMaxThreads; i++) {
1140       ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1141     }
1142     free(PetscThreadPoint);
1143     free(arrmutex);
1144     free(arrcond1);
1145     free(arrcond2);
1146     free(arrstart);
1147     free(arrready);
1148     free(job_main.pdata);
1149     free(pVal);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MainWait_Main"
1156 void MainWait_Main() {
1157   int i,ierr;
1158   for(i=0; i<PetscMaxThreads; i++) {
1159     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1160     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1161       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1162     }
1163     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1164   }
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MainJob_Main"
1169 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1170   int i,ierr;
1171   PetscErrorCode ijoberr = 0;
1172   if(PetscUseThreadPool) {
1173     MainWait(); /* you know everyone is waiting to be signalled! */
1174     job_main.pfunc = pFunc;
1175     job_main.pdata = data;
1176     for(i=0; i<PetscMaxThreads; i++) {
1177       *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1178     }
1179     /* tell the threads to go to work */
1180     for(i=0; i<PetscMaxThreads; i++) {
1181       ierr = pthread_cond_signal(job_main.cond2array[i]);
1182     }
1183     if(pFunc!=FuncFinish) {
1184       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1185     }
1186   }
1187   else {
1188     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1189     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1190     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1191     free(apThread);
1192   }
1193   if(ithreaderr) {
1194     ijoberr = ithreaderr;
1195   }
1196   return ijoberr;
1197 }
1198 /****  ****/
1199 
1200 /**** Chain Thread Functions ****/
1201 void* PetscThreadFunc_Chain(void* arg) {
1202   PetscErrorCode iterr;
1203   int icorr,ierr;
1204   int* pId = (int*)arg;
1205   int ThreadId = *pId;
1206   int SubWorker = ThreadId + 1;
1207   PetscBool PeeOn;
1208   cpu_set_t mset;
1209 
1210   icorr = ThreadCoreAffinity[ThreadId];
1211   CPU_ZERO(&mset);
1212   CPU_SET(icorr,&mset);
1213   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1214 
1215   if(ThreadId==(PetscMaxThreads-1)) {
1216     PeeOn = PETSC_TRUE;
1217   }
1218   else {
1219     PeeOn = PETSC_FALSE;
1220   }
1221   if(PeeOn==PETSC_FALSE) {
1222     /* check your subordinate, wait for him to be ready */
1223     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1224     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1225       /* upon entry, automically releases the lock and blocks
1226        upon return, has the lock */
1227       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1228     }
1229     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1230     /* your subordinate is now ready*/
1231   }
1232   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1233   /* update your ready status */
1234   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1235   if(ThreadId==0) {
1236     job_chain.eJobStat = JobCompleted;
1237     /* signal main */
1238     ierr = pthread_cond_signal(&main_cond);
1239   }
1240   else {
1241     /* tell your boss that you're ready to work */
1242     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1243   }
1244   /*  the while loop needs to have an exit
1245      the 'main' thread can terminate all the threads by performing a broadcast
1246    and calling FuncFinish */
1247   while(PetscThreadGo) {
1248     /* need to check the condition to ensure we don't have to wait
1249        waiting when you don't have to causes problems
1250      also need to check the condition to ensure proper handling of spurious wakeups */
1251     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1252       /*upon entry, automically releases the lock and blocks
1253        upon return, has the lock */
1254         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1255 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1256 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1257     }
1258     if(ThreadId==0) {
1259       job_chain.startJob = PETSC_FALSE;
1260       job_chain.eJobStat = ThreadsWorking;
1261     }
1262     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1263     if(PeeOn==PETSC_FALSE) {
1264       /* tell your subworker it's time to get to work */
1265       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1266     }
1267     /* do your job */
1268     if(job_chain.pdata==NULL) {
1269       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1270     }
1271     else {
1272       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1273     }
1274     if(iterr!=0) {
1275       ithreaderr = 1;
1276     }
1277     if(PetscThreadGo) {
1278       /* reset job, get ready for more */
1279       if(PeeOn==PETSC_FALSE) {
1280         /* check your subordinate, wait for him to be ready
1281          how do you know for a fact that your subordinate has actually started? */
1282         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1283         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1284           /* upon entry, automically releases the lock and blocks
1285            upon return, has the lock */
1286           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1287         }
1288         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1289         /* your subordinate is now ready */
1290       }
1291       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1292       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1293       if(ThreadId==0) {
1294 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1295         /* root thread (foreman) signals 'main' */
1296         ierr = pthread_cond_signal(&main_cond);
1297       }
1298       else {
1299         /* signal your boss before you go to sleep */
1300         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1301       }
1302     }
1303   }
1304   return NULL;
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "PetscThreadInitialize_Chain"
1309 void* PetscThreadInitialize_Chain(PetscInt N) {
1310   PetscInt i,ierr;
1311   int status;
1312 
1313   if(PetscUseThreadPool) {
1314     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1315     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1316     arrmutex = (char*)memalign(Val1,Val2);
1317     arrcond1 = (char*)memalign(Val1,Val2);
1318     arrcond2 = (char*)memalign(Val1,Val2);
1319     arrstart = (char*)memalign(Val1,Val2);
1320     arrready = (char*)memalign(Val1,Val2);
1321     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1322     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1323     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1324     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1325     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1326     /* initialize job structure */
1327     for(i=0; i<PetscMaxThreads; i++) {
1328       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1329       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1330       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1331       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1332       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1333     }
1334     for(i=0; i<PetscMaxThreads; i++) {
1335       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1336       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1337       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1338       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1339       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1340     }
1341     job_chain.pfunc = NULL;
1342     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1343     job_chain.startJob = PETSC_FALSE;
1344     job_chain.eJobStat = JobInitiated;
1345     pVal = (int*)malloc(N*sizeof(int));
1346     /* allocate memory in the heap for the thread structure */
1347     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1348     /* create threads */
1349     for(i=0; i<N; i++) {
1350       pVal[i] = i;
1351       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1352       /* should check error */
1353     }
1354   }
1355   else {
1356   }
1357   return NULL;
1358 }
1359 
1360 
1361 #undef __FUNCT__
1362 #define __FUNCT__ "PetscThreadFinalize_Chain"
1363 PetscErrorCode PetscThreadFinalize_Chain() {
1364   int i,ierr;
1365   void* jstatus;
1366 
1367   PetscFunctionBegin;
1368 
1369   if(PetscUseThreadPool) {
1370     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1371     /* join the threads */
1372     for(i=0; i<PetscMaxThreads; i++) {
1373       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1374       /* should check error */
1375     }
1376     free(PetscThreadPoint);
1377     free(arrmutex);
1378     free(arrcond1);
1379     free(arrcond2);
1380     free(arrstart);
1381     free(arrready);
1382     free(job_chain.pdata);
1383     free(pVal);
1384   }
1385   else {
1386   }
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MainWait_Chain"
1392 void MainWait_Chain() {
1393   int ierr;
1394   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1395   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1396     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1397   }
1398   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MainJob_Chain"
1403 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1404   int i,ierr;
1405   PetscErrorCode ijoberr = 0;
1406   if(PetscUseThreadPool) {
1407     MainWait();
1408     job_chain.pfunc = pFunc;
1409     job_chain.pdata = data;
1410     job_chain.startJob = PETSC_TRUE;
1411     for(i=0; i<PetscMaxThreads; i++) {
1412       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1413     }
1414     job_chain.eJobStat = JobInitiated;
1415     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1416     if(pFunc!=FuncFinish) {
1417       MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1418     }
1419   }
1420   else {
1421     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1422     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1423     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1424     free(apThread);
1425   }
1426   if(ithreaderr) {
1427     ijoberr = ithreaderr;
1428   }
1429   return ijoberr;
1430 }
1431 /****  ****/
1432 
1433 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1434 /**** True Thread Functions ****/
1435 void* PetscThreadFunc_True(void* arg) {
1436   int icorr,ierr,iVal;
1437   int* pId = (int*)arg;
1438   int ThreadId = *pId;
1439   PetscErrorCode iterr;
1440   cpu_set_t mset;
1441 
1442   icorr = ThreadCoreAffinity[ThreadId];
1443   CPU_ZERO(&mset);
1444   CPU_SET(icorr,&mset);
1445   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1446 
1447   ierr = pthread_mutex_lock(&job_true.mutex);
1448   job_true.iNumReadyThreads++;
1449   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1450     ierr = pthread_cond_signal(&main_cond);
1451   }
1452   /*the while loop needs to have an exit
1453     the 'main' thread can terminate all the threads by performing a broadcast
1454    and calling FuncFinish */
1455   while(PetscThreadGo) {
1456     /*need to check the condition to ensure we don't have to wait
1457       waiting when you don't have to causes problems
1458      also need to wait if another thread sneaks in and messes with the predicate */
1459     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1460       /* upon entry, automically releases the lock and blocks
1461        upon return, has the lock */
1462       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1463     }
1464     job_true.startJob = PETSC_FALSE;
1465     job_true.iNumJobThreads--;
1466     job_true.iNumReadyThreads--;
1467     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1468     pthread_mutex_unlock(&job_true.mutex);
1469     if(job_true.pdata==NULL) {
1470       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1471     }
1472     else {
1473       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1474     }
1475     if(iterr!=0) {
1476       ithreaderr = 1;
1477     }
1478     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1479       what happens if a thread finishes before they all start? BAD!
1480      what happens if a thread finishes before any else start? BAD! */
1481     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1482     /* reset job */
1483     if(PetscThreadGo) {
1484       pthread_mutex_lock(&job_true.mutex);
1485       job_true.iNumReadyThreads++;
1486       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1487 	/* signal the 'main' thread that the job is done! (only done once) */
1488 	ierr = pthread_cond_signal(&main_cond);
1489       }
1490     }
1491   }
1492   return NULL;
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "PetscThreadInitialize_True"
1497 void* PetscThreadInitialize_True(PetscInt N) {
1498   PetscInt i;
1499   int status;
1500 
1501   if(PetscUseThreadPool) {
1502     pVal = (int*)malloc(N*sizeof(int));
1503     /* allocate memory in the heap for the thread structure */
1504     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1505     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1506     job_true.pdata = (void**)malloc(N*sizeof(void*));
1507     for(i=0; i<N; i++) {
1508       pVal[i] = i;
1509       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1510       /* error check to ensure proper thread creation */
1511       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1512       /* should check error */
1513     }
1514   }
1515   else {
1516   }
1517   return NULL;
1518 }
1519 
1520 
1521 #undef __FUNCT__
1522 #define __FUNCT__ "PetscThreadFinalize_True"
1523 PetscErrorCode PetscThreadFinalize_True() {
1524   int i,ierr;
1525   void* jstatus;
1526 
1527   PetscFunctionBegin;
1528 
1529   if(PetscUseThreadPool) {
1530     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1531     /* join the threads */
1532     for(i=0; i<PetscMaxThreads; i++) {
1533       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1534       /* should check error */
1535     }
1536     free(BarrPoint);
1537     free(PetscThreadPoint);
1538   }
1539   else {
1540   }
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MainWait_True"
1546 void MainWait_True() {
1547   int ierr;
1548   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1549     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1550   }
1551   ierr = pthread_mutex_unlock(&job_true.mutex);
1552 }
1553 
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MainJob_True"
1556 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1557   int ierr;
1558   PetscErrorCode ijoberr = 0;
1559   if(PetscUseThreadPool) {
1560     MainWait();
1561     job_true.pfunc = pFunc;
1562     job_true.pdata = data;
1563     job_true.pbarr = &BarrPoint[n];
1564     job_true.iNumJobThreads = n;
1565     job_true.startJob = PETSC_TRUE;
1566     ierr = pthread_cond_broadcast(&job_true.cond);
1567     if(pFunc!=FuncFinish) {
1568       MainWait(); /* why wait after? guarantees that job gets done */
1569     }
1570   }
1571   else {
1572     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1573     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1574     PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1575     free(apThread);
1576   }
1577   if(ithreaderr) {
1578     ijoberr = ithreaderr;
1579   }
1580   return ijoberr;
1581 }
1582 /****  ****/
1583 #endif
1584 
1585 void* FuncFinish(void* arg) {
1586   PetscThreadGo = PETSC_FALSE;
1587   return(0);
1588 }
1589 
1590 #endif
1591