xref: /petsc/src/sys/objects/init.c (revision 51d315f7f51405b454a6b2ffbc3c7276a5e2a085)
1*51d315f7SKerry Stevens //new kds file - implements the M-ary tree
2e5c89e4eSSatish Balay /*
3e5c89e4eSSatish Balay 
4e5c89e4eSSatish Balay    This file defines part of the initialization of PETSc
5e5c89e4eSSatish Balay 
6e5c89e4eSSatish Balay   This file uses regular malloc and free because it cannot know
7e5c89e4eSSatish Balay   what malloc is being used until it has already processed the input.
8e5c89e4eSSatish Balay */
9e5c89e4eSSatish Balay 
10*51d315f7SKerry Stevens #define _GNU_SOURCE
11*51d315f7SKerry Stevens #include <sched.h>
12c6db04a5SJed Brown #include <petscsys.h>        /*I  "petscsys.h"   I*/
1351dcc849SKerry Stevens #include <pthread.h>
14*51d315f7SKerry Stevens #include <sys/sysinfo.h>
15*51d315f7SKerry Stevens #include <unistd.h>
16e5c89e4eSSatish Balay #if defined(PETSC_HAVE_STDLIB_H)
17e5c89e4eSSatish Balay #include <stdlib.h>
18e5c89e4eSSatish Balay #endif
19e5c89e4eSSatish Balay #if defined(PETSC_HAVE_MALLOC_H)
20e5c89e4eSSatish Balay #include <malloc.h>
21e5c89e4eSSatish Balay #endif
22555d055bSBarry Smith #if defined(PETSC_HAVE_VALGRIND)
23555d055bSBarry Smith #include <valgrind/valgrind.h>
24555d055bSBarry Smith #endif
25555d055bSBarry Smith 
26e5c89e4eSSatish Balay /* ------------------------Nasty global variables -------------------------------*/
27e5c89e4eSSatish Balay /*
28e5c89e4eSSatish Balay      Indicates if PETSc started up MPI, or it was
29e5c89e4eSSatish Balay    already started before PETSc was initialized.
30e5c89e4eSSatish Balay */
317087cfbeSBarry Smith PetscBool    PetscBeganMPI         = PETSC_FALSE;
327087cfbeSBarry Smith PetscBool    PetscInitializeCalled = PETSC_FALSE;
337087cfbeSBarry Smith PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
3451dcc849SKerry Stevens PetscBool    PetscUseThreadPool    = PETSC_FALSE;
3551dcc849SKerry Stevens PetscBool    PetscThreadGo         = PETSC_TRUE;
367087cfbeSBarry Smith PetscMPIInt  PetscGlobalRank = -1;
377087cfbeSBarry Smith PetscMPIInt  PetscGlobalSize = -1;
3851dcc849SKerry Stevens PetscMPIInt  PetscMaxThreads = 2;
3951dcc849SKerry Stevens pthread_t*   PetscThreadPoint;
40*51d315f7SKerry Stevens pthread_barrier_t* BarrPoint;   //used by 'true' thread pool
41*51d315f7SKerry Stevens PetscErrorCode ithreaderr = 0;
42f09cb4aaSKerry Stevens int*         pVal;
4351dcc849SKerry Stevens 
44*51d315f7SKerry Stevens #define CACHE_LINE_SIZE 64  //used by 'chain', 'main','tree' thread pools
45*51d315f7SKerry Stevens int* ThreadCoreAffinity;
46*51d315f7SKerry Stevens 
47*51d315f7SKerry Stevens typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  //used by 'chain','tree' thread pool
48*51d315f7SKerry Stevens 
49*51d315f7SKerry Stevens typedef struct {
50*51d315f7SKerry Stevens   pthread_mutex_t** mutexarray;
51*51d315f7SKerry Stevens   pthread_cond_t**  cond1array;
52*51d315f7SKerry Stevens   pthread_cond_t** cond2array;
53*51d315f7SKerry Stevens   void* (*pfunc)(void*);
54*51d315f7SKerry Stevens   void** pdata;
55*51d315f7SKerry Stevens   PetscBool startJob;
56*51d315f7SKerry Stevens   estat eJobStat;
57*51d315f7SKerry Stevens   PetscBool** arrThreadStarted;
58*51d315f7SKerry Stevens   PetscBool** arrThreadReady;
59*51d315f7SKerry Stevens } sjob_tree;
60*51d315f7SKerry Stevens sjob_tree job_tree;
61*51d315f7SKerry Stevens typedef struct {
62*51d315f7SKerry Stevens   pthread_mutex_t** mutexarray;
63*51d315f7SKerry Stevens   pthread_cond_t**  cond1array;
64*51d315f7SKerry Stevens   pthread_cond_t** cond2array;
65*51d315f7SKerry Stevens   void* (*pfunc)(void*);
66*51d315f7SKerry Stevens   void** pdata;
67*51d315f7SKerry Stevens   PetscBool** arrThreadReady;
68*51d315f7SKerry Stevens } sjob_main;
69*51d315f7SKerry Stevens sjob_main job_main;
70*51d315f7SKerry Stevens typedef struct {
71*51d315f7SKerry Stevens   pthread_mutex_t** mutexarray;
72*51d315f7SKerry Stevens   pthread_cond_t**  cond1array;
73*51d315f7SKerry Stevens   pthread_cond_t** cond2array;
74*51d315f7SKerry Stevens   void* (*pfunc)(void*);
75*51d315f7SKerry Stevens   void** pdata;
76*51d315f7SKerry Stevens   PetscBool startJob;
77*51d315f7SKerry Stevens   estat eJobStat;
78*51d315f7SKerry Stevens   PetscBool** arrThreadStarted;
79*51d315f7SKerry Stevens   PetscBool** arrThreadReady;
80*51d315f7SKerry Stevens } sjob_chain;
81*51d315f7SKerry Stevens sjob_chain job_chain;
8251dcc849SKerry Stevens typedef struct {
8351dcc849SKerry Stevens   pthread_mutex_t mutex;
8451dcc849SKerry Stevens   pthread_cond_t cond;
8551dcc849SKerry Stevens   void* (*pfunc)(void*);
8651dcc849SKerry Stevens   void** pdata;
8751dcc849SKerry Stevens   pthread_barrier_t* pbarr;
8851dcc849SKerry Stevens   int iNumJobThreads;
8951dcc849SKerry Stevens   int iNumReadyThreads;
9051dcc849SKerry Stevens   PetscBool startJob;
91*51d315f7SKerry Stevens } sjob_true;
92*51d315f7SKerry Stevens sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
9351dcc849SKerry Stevens 
94*51d315f7SKerry Stevens pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  //used by 'true', 'chain','tree' thread pools
95*51d315f7SKerry Stevens char* arrmutex; //used by 'chain','main','tree' thread pools
96*51d315f7SKerry Stevens char* arrcond1; //used by 'chain','main','tree' thread pools
97*51d315f7SKerry Stevens char* arrcond2; //used by 'chain','main','tree' thread pools
98*51d315f7SKerry Stevens char* arrstart; //used by 'chain','main','tree' thread pools
99*51d315f7SKerry Stevens char* arrready; //used by 'chain','main','tree' thread pools
10051dcc849SKerry Stevens 
101*51d315f7SKerry Stevens /* Function Pointers */
102*51d315f7SKerry Stevens void*          (*PetscThreadFunc)(void*) = NULL;
103*51d315f7SKerry Stevens void*          (*PetscThreadInitialize)(PetscInt) = NULL;
104*51d315f7SKerry Stevens PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
105*51d315f7SKerry Stevens void           (*MainWait)(void) = NULL;
106*51d315f7SKerry Stevens PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
107*51d315f7SKerry Stevens /**** Tree Functions ****/
108*51d315f7SKerry Stevens void*          PetscThreadFunc_Tree(void*);
109*51d315f7SKerry Stevens void*          PetscThreadInitialize_Tree(PetscInt);
110*51d315f7SKerry Stevens PetscErrorCode PetscThreadFinalize_Tree(void);
111*51d315f7SKerry Stevens void           MainWait_Tree(void);
112*51d315f7SKerry Stevens PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
113*51d315f7SKerry Stevens /**** Main Functions ****/
114*51d315f7SKerry Stevens void*          PetscThreadFunc_Main(void*);
115*51d315f7SKerry Stevens void*          PetscThreadInitialize_Main(PetscInt);
116*51d315f7SKerry Stevens PetscErrorCode PetscThreadFinalize_Main(void);
117*51d315f7SKerry Stevens void           MainWait_Main(void);
118*51d315f7SKerry Stevens PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
119*51d315f7SKerry Stevens /**** Chain Functions ****/
120*51d315f7SKerry Stevens void*          PetscThreadFunc_Chain(void*);
121*51d315f7SKerry Stevens void*          PetscThreadInitialize_Chain(PetscInt);
122*51d315f7SKerry Stevens PetscErrorCode PetscThreadFinalize_Chain(void);
123*51d315f7SKerry Stevens void           MainWait_Chain(void);
124*51d315f7SKerry Stevens PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
125*51d315f7SKerry Stevens /**** True Functions ****/
126*51d315f7SKerry Stevens void*          PetscThreadFunc_True(void*);
127*51d315f7SKerry Stevens void*          PetscThreadInitialize_True(PetscInt);
128*51d315f7SKerry Stevens PetscErrorCode PetscThreadFinalize_True(void);
129*51d315f7SKerry Stevens void           MainWait_True(void);
130*51d315f7SKerry Stevens PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
131*51d315f7SKerry Stevens /****  ****/
132*51d315f7SKerry Stevens 
13351dcc849SKerry Stevens void* FuncFinish(void*);
1340ca81413SKerry Stevens void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
1350ca81413SKerry Stevens void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
136e5c89e4eSSatish Balay 
137e5c89e4eSSatish Balay #if defined(PETSC_USE_COMPLEX)
138e5c89e4eSSatish Balay #if defined(PETSC_COMPLEX_INSTANTIATE)
139e5c89e4eSSatish Balay template <> class std::complex<double>; /* instantiate complex template class */
140e5c89e4eSSatish Balay #endif
1412c876bd9SBarry Smith #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
1427087cfbeSBarry Smith MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
1437087cfbeSBarry Smith MPI_Datatype   MPI_C_COMPLEX;
1442c876bd9SBarry Smith #endif
1457087cfbeSBarry Smith PetscScalar    PETSC_i;
146e5c89e4eSSatish Balay #else
1477087cfbeSBarry Smith PetscScalar    PETSC_i = 0.0;
148e5c89e4eSSatish Balay #endif
149ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL___FLOAT128)
150c90a1750SBarry Smith MPI_Datatype   MPIU___FLOAT128 = 0;
151c90a1750SBarry Smith #endif
1527087cfbeSBarry Smith MPI_Datatype   MPIU_2SCALAR = 0;
1537087cfbeSBarry Smith MPI_Datatype   MPIU_2INT = 0;
15475567043SBarry Smith 
155e5c89e4eSSatish Balay /*
156e5c89e4eSSatish Balay      These are needed by petscbt.h
157e5c89e4eSSatish Balay */
158c6db04a5SJed Brown #include <petscbt.h>
1597087cfbeSBarry Smith char      _BT_mask = ' ';
1607087cfbeSBarry Smith char      _BT_c = ' ';
1617087cfbeSBarry Smith PetscInt  _BT_idx  = 0;
162e5c89e4eSSatish Balay 
163e5c89e4eSSatish Balay /*
164e5c89e4eSSatish Balay        Function that is called to display all error messages
165e5c89e4eSSatish Balay */
1667087cfbeSBarry Smith PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
1677087cfbeSBarry Smith PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
168238ccf28SShri Abhyankar #if defined(PETSC_HAVE_MATLAB_ENGINE)
1697087cfbeSBarry Smith PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
170238ccf28SShri Abhyankar #else
1717087cfbeSBarry Smith PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
172238ccf28SShri Abhyankar #endif
173bab1f7e6SVictor Minden /*
1748154be41SBarry Smith   This is needed to turn on/off cusp synchronization */
1758154be41SBarry Smith PetscBool   synchronizeCUSP = PETSC_FALSE;
176bab1f7e6SVictor Minden 
177e5c89e4eSSatish Balay /* ------------------------------------------------------------------------------*/
178e5c89e4eSSatish Balay /*
179e5c89e4eSSatish Balay    Optional file where all PETSc output from various prints is saved
180e5c89e4eSSatish Balay */
181e5c89e4eSSatish Balay FILE *petsc_history = PETSC_NULL;
182e5c89e4eSSatish Balay 
183e5c89e4eSSatish Balay #undef __FUNCT__
184f3dea69dSBarry Smith #define __FUNCT__ "PetscOpenHistoryFile"
1857087cfbeSBarry Smith PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
186e5c89e4eSSatish Balay {
187e5c89e4eSSatish Balay   PetscErrorCode ierr;
188e5c89e4eSSatish Balay   PetscMPIInt    rank,size;
189e5c89e4eSSatish Balay   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
190e5c89e4eSSatish Balay   char           version[256];
191e5c89e4eSSatish Balay 
192e5c89e4eSSatish Balay   PetscFunctionBegin;
193e5c89e4eSSatish Balay   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
194e5c89e4eSSatish Balay   if (!rank) {
195e5c89e4eSSatish Balay     char        arch[10];
196f56c2debSBarry Smith     int         err;
19788c29154SBarry Smith     PetscViewer viewer;
198f56c2debSBarry Smith 
199e5c89e4eSSatish Balay     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
200e5c89e4eSSatish Balay     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
201a523d312SBarry Smith     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
202e5c89e4eSSatish Balay     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
203e5c89e4eSSatish Balay     if (filename) {
204e5c89e4eSSatish Balay       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
205e5c89e4eSSatish Balay     } else {
206e5c89e4eSSatish Balay       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
207e5c89e4eSSatish Balay       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
208e5c89e4eSSatish Balay       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
209e5c89e4eSSatish Balay     }
210e5c89e4eSSatish Balay 
211e32f2f54SBarry Smith     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
212e5c89e4eSSatish Balay     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
213e5c89e4eSSatish Balay     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
214e5c89e4eSSatish Balay     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
215e5c89e4eSSatish Balay     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
21688c29154SBarry Smith     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
21788c29154SBarry Smith     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
2186bf464f9SBarry Smith     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
219e5c89e4eSSatish Balay     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
220f56c2debSBarry Smith     err = fflush(*fd);
221e32f2f54SBarry Smith     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
222e5c89e4eSSatish Balay   }
223e5c89e4eSSatish Balay   PetscFunctionReturn(0);
224e5c89e4eSSatish Balay }
225e5c89e4eSSatish Balay 
226e5c89e4eSSatish Balay #undef __FUNCT__
227f3dea69dSBarry Smith #define __FUNCT__ "PetscCloseHistoryFile"
2287087cfbeSBarry Smith PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
229e5c89e4eSSatish Balay {
230e5c89e4eSSatish Balay   PetscErrorCode ierr;
231e5c89e4eSSatish Balay   PetscMPIInt    rank;
232e5c89e4eSSatish Balay   char           date[64];
233f56c2debSBarry Smith   int            err;
234e5c89e4eSSatish Balay 
235e5c89e4eSSatish Balay   PetscFunctionBegin;
236e5c89e4eSSatish Balay   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
237e5c89e4eSSatish Balay   if (!rank) {
238e5c89e4eSSatish Balay     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
239e5c89e4eSSatish Balay     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
240e5c89e4eSSatish Balay     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
241e5c89e4eSSatish Balay     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
242f56c2debSBarry Smith     err = fflush(*fd);
243e32f2f54SBarry Smith     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
244f56c2debSBarry Smith     err = fclose(*fd);
245e32f2f54SBarry Smith     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
246e5c89e4eSSatish Balay   }
247e5c89e4eSSatish Balay   PetscFunctionReturn(0);
248e5c89e4eSSatish Balay }
249e5c89e4eSSatish Balay 
250e5c89e4eSSatish Balay /* ------------------------------------------------------------------------------*/
251e5c89e4eSSatish Balay 
252e5c89e4eSSatish Balay /*
253e5c89e4eSSatish Balay    This is ugly and probably belongs somewhere else, but I want to
254e5c89e4eSSatish Balay   be able to put a true MPI abort error handler with command line args.
255e5c89e4eSSatish Balay 
256e5c89e4eSSatish Balay     This is so MPI errors in the debugger will leave all the stack
2573c311c98SBarry Smith   frames. The default MP_Abort() cleans up and exits thus providing no useful information
2583c311c98SBarry Smith   in the debugger hence we call abort() instead of MPI_Abort().
259e5c89e4eSSatish Balay */
260e5c89e4eSSatish Balay 
261e5c89e4eSSatish Balay #undef __FUNCT__
262e5c89e4eSSatish Balay #define __FUNCT__ "Petsc_MPI_AbortOnError"
263e5c89e4eSSatish Balay void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
264e5c89e4eSSatish Balay {
265e5c89e4eSSatish Balay   PetscFunctionBegin;
2663c311c98SBarry Smith   (*PetscErrorPrintf)("MPI error %d\n",*flag);
267e5c89e4eSSatish Balay   abort();
268e5c89e4eSSatish Balay }
269e5c89e4eSSatish Balay 
270e5c89e4eSSatish Balay #undef __FUNCT__
271e5c89e4eSSatish Balay #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
272e5c89e4eSSatish Balay void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
273e5c89e4eSSatish Balay {
274e5c89e4eSSatish Balay   PetscErrorCode ierr;
275e5c89e4eSSatish Balay 
276e5c89e4eSSatish Balay   PetscFunctionBegin;
2773c311c98SBarry Smith   (*PetscErrorPrintf)("MPI error %d\n",*flag);
278e5c89e4eSSatish Balay   ierr = PetscAttachDebugger();
279e5c89e4eSSatish Balay   if (ierr) { /* hopeless so get out */
2803c311c98SBarry Smith     MPI_Abort(*comm,*flag);
281e5c89e4eSSatish Balay   }
282e5c89e4eSSatish Balay }
283e5c89e4eSSatish Balay 
284e5c89e4eSSatish Balay #undef __FUNCT__
285e5c89e4eSSatish Balay #define __FUNCT__ "PetscEnd"
286e5c89e4eSSatish Balay /*@C
287e5c89e4eSSatish Balay    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
288e5c89e4eSSatish Balay      wishes a clean exit somewhere deep in the program.
289e5c89e4eSSatish Balay 
290e5c89e4eSSatish Balay    Collective on PETSC_COMM_WORLD
291e5c89e4eSSatish Balay 
292e5c89e4eSSatish Balay    Options Database Keys are the same as for PetscFinalize()
293e5c89e4eSSatish Balay 
294e5c89e4eSSatish Balay    Level: advanced
295e5c89e4eSSatish Balay 
296e5c89e4eSSatish Balay    Note:
297e5c89e4eSSatish Balay    See PetscInitialize() for more general runtime options.
298e5c89e4eSSatish Balay 
29988c29154SBarry Smith .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
300e5c89e4eSSatish Balay @*/
3017087cfbeSBarry Smith PetscErrorCode  PetscEnd(void)
302e5c89e4eSSatish Balay {
303e5c89e4eSSatish Balay   PetscFunctionBegin;
304e5c89e4eSSatish Balay   PetscFinalize();
305e5c89e4eSSatish Balay   exit(0);
306e5c89e4eSSatish Balay   return 0;
307e5c89e4eSSatish Balay }
308e5c89e4eSSatish Balay 
309ace3abfcSBarry Smith PetscBool    PetscOptionsPublish = PETSC_FALSE;
31009573ac7SBarry Smith extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
311ace3abfcSBarry Smith extern PetscBool  petscsetmallocvisited;
312e5c89e4eSSatish Balay static char       emacsmachinename[256];
313e5c89e4eSSatish Balay 
314e5c89e4eSSatish Balay PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
315e5c89e4eSSatish Balay PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
316e5c89e4eSSatish Balay 
317e5c89e4eSSatish Balay #undef __FUNCT__
318e5c89e4eSSatish Balay #define __FUNCT__ "PetscSetHelpVersionFunctions"
319e5c89e4eSSatish Balay /*@C
320e5c89e4eSSatish Balay    PetscSetHelpVersionFunctions - Sets functions that print help and version information
321e5c89e4eSSatish Balay    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
322e5c89e4eSSatish Balay    This routine enables a "higher-level" package that uses PETSc to print its messages first.
323e5c89e4eSSatish Balay 
324e5c89e4eSSatish Balay    Input Parameter:
325e5c89e4eSSatish Balay +  help - the help function (may be PETSC_NULL)
326da93591fSBarry Smith -  version - the version function (may be PETSC_NULL)
327e5c89e4eSSatish Balay 
328e5c89e4eSSatish Balay    Level: developer
329e5c89e4eSSatish Balay 
330e5c89e4eSSatish Balay    Concepts: package help message
331e5c89e4eSSatish Balay 
332e5c89e4eSSatish Balay @*/
3337087cfbeSBarry Smith PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
334e5c89e4eSSatish Balay {
335e5c89e4eSSatish Balay   PetscFunctionBegin;
336e5c89e4eSSatish Balay   PetscExternalHelpFunction    = help;
337e5c89e4eSSatish Balay   PetscExternalVersionFunction = version;
338e5c89e4eSSatish Balay   PetscFunctionReturn(0);
339e5c89e4eSSatish Balay }
340e5c89e4eSSatish Balay 
341e5c89e4eSSatish Balay #undef __FUNCT__
342e5c89e4eSSatish Balay #define __FUNCT__ "PetscOptionsCheckInitial_Private"
3437087cfbeSBarry Smith PetscErrorCode  PetscOptionsCheckInitial_Private(void)
344e5c89e4eSSatish Balay {
345e5c89e4eSSatish Balay   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
346e5c89e4eSSatish Balay   MPI_Comm       comm = PETSC_COMM_WORLD;
347ace3abfcSBarry Smith   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
348e5c89e4eSSatish Balay   PetscErrorCode ierr;
349a6d0e24fSJed Brown   PetscReal      si;
350e5c89e4eSSatish Balay   int            i;
351e5c89e4eSSatish Balay   PetscMPIInt    rank;
352e5c89e4eSSatish Balay   char           version[256];
353e5c89e4eSSatish Balay 
354e5c89e4eSSatish Balay   PetscFunctionBegin;
355e5c89e4eSSatish Balay   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
356e5c89e4eSSatish Balay 
357e5c89e4eSSatish Balay   /*
358e5c89e4eSSatish Balay       Setup the memory management; support for tracing malloc() usage
359e5c89e4eSSatish Balay   */
3608bb29257SSatish Balay   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
36181b192fdSBarry Smith #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
362acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
363e5c89e4eSSatish Balay   if ((!flg2 || flg1) && !petscsetmallocvisited) {
364555d055bSBarry Smith #if defined(PETSC_HAVE_VALGRIND)
365555d055bSBarry Smith     if (flg2 || !(RUNNING_ON_VALGRIND)) {
366555d055bSBarry Smith       /* turn off default -malloc if valgrind is being used */
367555d055bSBarry Smith #endif
368e5c89e4eSSatish Balay       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
369555d055bSBarry Smith #if defined(PETSC_HAVE_VALGRIND)
370555d055bSBarry Smith     }
371555d055bSBarry Smith #endif
372e5c89e4eSSatish Balay   }
373e5c89e4eSSatish Balay #else
374acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
375acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
376e5c89e4eSSatish Balay   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
377e5c89e4eSSatish Balay #endif
378e5c89e4eSSatish Balay   if (flg3) {
379e5c89e4eSSatish Balay     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
380e5c89e4eSSatish Balay   }
38190d69ab7SBarry Smith   flg1 = PETSC_FALSE;
382acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
383e5c89e4eSSatish Balay   if (flg1) {
384e5c89e4eSSatish Balay     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
385e5c89e4eSSatish Balay     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
386e5c89e4eSSatish Balay   }
387e5c89e4eSSatish Balay 
38890d69ab7SBarry Smith   flg1 = PETSC_FALSE;
389acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
3907783f70dSSatish Balay   if (!flg1) {
39190d69ab7SBarry Smith     flg1 = PETSC_FALSE;
392acfcf0e5SJed Brown     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
3937783f70dSSatish Balay   }
394e5c89e4eSSatish Balay   if (flg1) {
395e5c89e4eSSatish Balay     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
396e5c89e4eSSatish Balay   }
397e5c89e4eSSatish Balay 
398e5c89e4eSSatish Balay   /*
399e5c89e4eSSatish Balay       Set the display variable for graphics
400e5c89e4eSSatish Balay   */
401e5c89e4eSSatish Balay   ierr = PetscSetDisplay();CHKERRQ(ierr);
402e5c89e4eSSatish Balay 
403e5c89e4eSSatish Balay   /*
40451dcc849SKerry Stevens       Determine whether user specified maximum number of threads
40551dcc849SKerry Stevens    */
40651dcc849SKerry Stevens   ierr = PetscOptionsHasName(PETSC_NULL,"-thread_max",&flg1);CHKERRQ(ierr);
40751dcc849SKerry Stevens   if(flg1) {
40851dcc849SKerry Stevens     ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
40951dcc849SKerry Stevens   }
41051dcc849SKerry Stevens 
41151dcc849SKerry Stevens   /*
41251dcc849SKerry Stevens       Determine whether to use thread pool
41351dcc849SKerry Stevens    */
41451dcc849SKerry Stevens   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
41551dcc849SKerry Stevens   if(flg1) {
41651dcc849SKerry Stevens     PetscUseThreadPool = PETSC_TRUE;
417*51d315f7SKerry Stevens     PetscInt N_CORES = get_nprocs();
418*51d315f7SKerry Stevens     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
419*51d315f7SKerry Stevens     char tstr[9];
420*51d315f7SKerry Stevens     char tbuf[2];
421*51d315f7SKerry Stevens     strcpy(tstr,"-thread");
422*51d315f7SKerry Stevens     for(i=0;i<PetscMaxThreads;i++) {
423*51d315f7SKerry Stevens       ThreadCoreAffinity[i] = i;  //default
424*51d315f7SKerry Stevens       sprintf(tbuf,"%d",i);
425*51d315f7SKerry Stevens       strcat(tstr,tbuf);
426*51d315f7SKerry Stevens       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
427*51d315f7SKerry Stevens       if(flg1) {
428*51d315f7SKerry Stevens         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
429*51d315f7SKerry Stevens         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; //check on the user
430*51d315f7SKerry Stevens       }
431*51d315f7SKerry Stevens       tstr[7] = '\0';
432*51d315f7SKerry Stevens     }
433*51d315f7SKerry Stevens     //get the thread pool type
434*51d315f7SKerry Stevens     PetscInt ipool = 0;
435*51d315f7SKerry Stevens     ierr = PetscOptionsGetInt(PETSC_NULL,"-pool",&ipool,PETSC_NULL);CHKERRQ(ierr);
436*51d315f7SKerry Stevens     switch(ipool) {
437*51d315f7SKerry Stevens     case 1:
438*51d315f7SKerry Stevens       PetscThreadFunc       = &PetscThreadFunc_Tree;
439*51d315f7SKerry Stevens       PetscThreadInitialize = &PetscThreadInitialize_Tree;
440*51d315f7SKerry Stevens       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
441*51d315f7SKerry Stevens       MainWait              = &MainWait_Tree;
442*51d315f7SKerry Stevens       MainJob               = &MainJob_Tree;
443*51d315f7SKerry Stevens       break;
444*51d315f7SKerry Stevens     case 2:
445*51d315f7SKerry Stevens       PetscThreadFunc       = &PetscThreadFunc_Main;
446*51d315f7SKerry Stevens       PetscThreadInitialize = &PetscThreadInitialize_Main;
447*51d315f7SKerry Stevens       PetscThreadFinalize   = &PetscThreadFinalize_Main;
448*51d315f7SKerry Stevens       MainWait              = &MainWait_Main;
449*51d315f7SKerry Stevens       MainJob               = &MainJob_Main;
450*51d315f7SKerry Stevens       break;
451*51d315f7SKerry Stevens     case 3:
452*51d315f7SKerry Stevens       PetscThreadFunc       = &PetscThreadFunc_Chain;
453*51d315f7SKerry Stevens       PetscThreadInitialize = &PetscThreadInitialize_Chain;
454*51d315f7SKerry Stevens       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
455*51d315f7SKerry Stevens       MainWait              = &MainWait_Chain;
456*51d315f7SKerry Stevens       MainJob               = &MainJob_Chain;
457*51d315f7SKerry Stevens       break;
458*51d315f7SKerry Stevens     default:
459*51d315f7SKerry Stevens       PetscThreadFunc       = &PetscThreadFunc_True;
460*51d315f7SKerry Stevens       PetscThreadInitialize = &PetscThreadInitialize_True;
461*51d315f7SKerry Stevens       PetscThreadFinalize   = &PetscThreadFinalize_True;
462*51d315f7SKerry Stevens       MainWait              = &MainWait_True;
463*51d315f7SKerry Stevens       MainJob               = &MainJob_True;
464*51d315f7SKerry Stevens       break;
465*51d315f7SKerry Stevens     }
46651dcc849SKerry Stevens   }
4670ca81413SKerry Stevens   PetscThreadInitialize(PetscMaxThreads);
46851dcc849SKerry Stevens   /*
469e5c89e4eSSatish Balay       Print the PETSc version information
470e5c89e4eSSatish Balay   */
471e5c89e4eSSatish Balay   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
472e5c89e4eSSatish Balay   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
473e5c89e4eSSatish Balay   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
474e5c89e4eSSatish Balay   if (flg1 || flg2 || flg3){
475e5c89e4eSSatish Balay 
476e5c89e4eSSatish Balay     /*
477e5c89e4eSSatish Balay        Print "higher-level" package version message
478e5c89e4eSSatish Balay     */
479e5c89e4eSSatish Balay     if (PetscExternalVersionFunction) {
480e5c89e4eSSatish Balay       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
481e5c89e4eSSatish Balay     }
482e5c89e4eSSatish Balay 
483a523d312SBarry Smith     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
484e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
485e5c89e4eSSatish Balay ------------------------------\n");CHKERRQ(ierr);
486e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
487e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
488e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
48984e42920SBarry Smith     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
490e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
491e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
492e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
493e5c89e4eSSatish Balay ------------------------------\n");CHKERRQ(ierr);
494e5c89e4eSSatish Balay   }
495e5c89e4eSSatish Balay 
496e5c89e4eSSatish Balay   /*
497e5c89e4eSSatish Balay        Print "higher-level" package help message
498e5c89e4eSSatish Balay   */
499e5c89e4eSSatish Balay   if (flg3){
500e5c89e4eSSatish Balay     if (PetscExternalHelpFunction) {
501e5c89e4eSSatish Balay       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
502e5c89e4eSSatish Balay     }
503e5c89e4eSSatish Balay   }
504e5c89e4eSSatish Balay 
505e5c89e4eSSatish Balay   /*
506e5c89e4eSSatish Balay       Setup the error handling
507e5c89e4eSSatish Balay   */
50890d69ab7SBarry Smith   flg1 = PETSC_FALSE;
509acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
510cb9801acSJed Brown   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
51190d69ab7SBarry Smith   flg1 = PETSC_FALSE;
512acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
513cb9801acSJed Brown   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
51490d69ab7SBarry Smith   flg1 = PETSC_FALSE;
515acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
516e5c89e4eSSatish Balay   if (flg1) {
517e5c89e4eSSatish Balay     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
518e5c89e4eSSatish Balay   }
51990d69ab7SBarry Smith   flg1 = PETSC_FALSE;
520acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
521cb9801acSJed Brown   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
52296cc47afSJed Brown   flg1 = PETSC_FALSE;
523acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
52496cc47afSJed Brown   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
525e5c89e4eSSatish Balay 
526e5c89e4eSSatish Balay   /*
527e5c89e4eSSatish Balay       Setup debugger information
528e5c89e4eSSatish Balay   */
529e5c89e4eSSatish Balay   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
530e5c89e4eSSatish Balay   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
531e5c89e4eSSatish Balay   if (flg1) {
532e5c89e4eSSatish Balay     MPI_Errhandler err_handler;
533e5c89e4eSSatish Balay 
534e5c89e4eSSatish Balay     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
535e5c89e4eSSatish Balay     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
536e5c89e4eSSatish Balay     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
537e5c89e4eSSatish Balay     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
538e5c89e4eSSatish Balay   }
5395e96ac45SJed Brown   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
5405e96ac45SJed Brown   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
541e5c89e4eSSatish Balay   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
542e5c89e4eSSatish Balay   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
543e5c89e4eSSatish Balay   if (flg1 || flg2) {
544e5c89e4eSSatish Balay     PetscMPIInt    size;
545e5c89e4eSSatish Balay     PetscInt       lsize,*nodes;
546e5c89e4eSSatish Balay     MPI_Errhandler err_handler;
547e5c89e4eSSatish Balay     /*
548e5c89e4eSSatish Balay        we have to make sure that all processors have opened
549e5c89e4eSSatish Balay        connections to all other processors, otherwise once the
550e5c89e4eSSatish Balay        debugger has stated it is likely to receive a SIGUSR1
551e5c89e4eSSatish Balay        and kill the program.
552e5c89e4eSSatish Balay     */
553e5c89e4eSSatish Balay     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
554e5c89e4eSSatish Balay     if (size > 2) {
555533163c2SBarry Smith       PetscMPIInt dummy = 0;
556e5c89e4eSSatish Balay       MPI_Status  status;
557e5c89e4eSSatish Balay       for (i=0; i<size; i++) {
558e5c89e4eSSatish Balay         if (rank != i) {
559e5c89e4eSSatish Balay           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
560e5c89e4eSSatish Balay         }
561e5c89e4eSSatish Balay       }
562e5c89e4eSSatish Balay       for (i=0; i<size; i++) {
563e5c89e4eSSatish Balay         if (rank != i) {
564e5c89e4eSSatish Balay           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
565e5c89e4eSSatish Balay         }
566e5c89e4eSSatish Balay       }
567e5c89e4eSSatish Balay     }
568e5c89e4eSSatish Balay     /* check if this processor node should be in debugger */
569e5c89e4eSSatish Balay     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
570e5c89e4eSSatish Balay     lsize = size;
571e5c89e4eSSatish Balay     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
572e5c89e4eSSatish Balay     if (flag) {
573e5c89e4eSSatish Balay       for (i=0; i<lsize; i++) {
574e5c89e4eSSatish Balay         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
575e5c89e4eSSatish Balay       }
576e5c89e4eSSatish Balay     }
577e5c89e4eSSatish Balay     if (!flag) {
578e5c89e4eSSatish Balay       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
579e5c89e4eSSatish Balay       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
580e5c89e4eSSatish Balay       if (flg1) {
581e5c89e4eSSatish Balay         ierr = PetscAttachDebugger();CHKERRQ(ierr);
582e5c89e4eSSatish Balay       } else {
583e5c89e4eSSatish Balay         ierr = PetscStopForDebugger();CHKERRQ(ierr);
584e5c89e4eSSatish Balay       }
585e5c89e4eSSatish Balay       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
586e5c89e4eSSatish Balay       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
587e5c89e4eSSatish Balay     }
588e5c89e4eSSatish Balay     ierr = PetscFree(nodes);CHKERRQ(ierr);
589e5c89e4eSSatish Balay   }
590e5c89e4eSSatish Balay 
591e5c89e4eSSatish Balay   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
592cb9801acSJed Brown   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
593e5c89e4eSSatish Balay 
59493ba235fSBarry Smith #if defined(PETSC_USE_SOCKET_VIEWER)
59522b84c2fSbcordonn   /*
59622b84c2fSbcordonn     Activates new sockets for zope if needed
59722b84c2fSbcordonn   */
59884ab5442Sbcordonn   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
599d8c6e182Sbcordonn   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
6006dc8fec2Sbcordonn   if (flgz){
60122b84c2fSbcordonn     int  sockfd;
602f1384234SBarry Smith     char hostname[256];
60322b84c2fSbcordonn     char username[256];
6046dc8fec2Sbcordonn     int  remoteport = 9999;
6059c4c166aSBarry Smith 
60684ab5442Sbcordonn     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
60784ab5442Sbcordonn     if (!hostname[0]){
6089c4c166aSBarry Smith       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
6099c4c166aSBarry Smith     }
61022b84c2fSbcordonn     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
6119c4c166aSBarry Smith     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
61222b84c2fSbcordonn     PETSC_ZOPEFD = fdopen(sockfd, "w");
61322b84c2fSbcordonn     if (flgzout){
61422b84c2fSbcordonn       PETSC_STDOUT = PETSC_ZOPEFD;
615606f100bSbcordonn       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
6166dc8fec2Sbcordonn       fprintf(PETSC_STDOUT, "<<<start>>>");
6179c4c166aSBarry Smith     } else {
618d8c6e182Sbcordonn       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
619d8c6e182Sbcordonn       fprintf(PETSC_ZOPEFD, "<<<start>>>");
6209c4c166aSBarry Smith     }
6219c4c166aSBarry Smith   }
62293ba235fSBarry Smith #endif
623ffc871a5SBarry Smith #if defined(PETSC_USE_SERVER)
624ffc871a5SBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
625ffc871a5SBarry Smith   if (flgz){
626ffc871a5SBarry Smith     PetscInt port = PETSC_DECIDE;
627ffc871a5SBarry Smith     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
628ffc871a5SBarry Smith     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
629ffc871a5SBarry Smith   }
630ffc871a5SBarry Smith #endif
6316dc8fec2Sbcordonn 
632e5c89e4eSSatish Balay   /*
633e5c89e4eSSatish Balay         Setup profiling and logging
634e5c89e4eSSatish Balay   */
6356cf91177SBarry Smith #if defined (PETSC_USE_INFO)
6368bb29257SSatish Balay   {
637e5c89e4eSSatish Balay     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
6386cf91177SBarry Smith     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
6398bb29257SSatish Balay     if (flg1 && logname[0]) {
640fcc2139eSBarry Smith       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
6418bb29257SSatish Balay     } else if (flg1) {
642fcc2139eSBarry Smith       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
643e5c89e4eSSatish Balay     }
644e5c89e4eSSatish Balay   }
645865f6aa8SSatish Balay #endif
646865f6aa8SSatish Balay #if defined(PETSC_USE_LOG)
647865f6aa8SSatish Balay   mname[0] = 0;
648f3dea69dSBarry Smith   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
649865f6aa8SSatish Balay   if (flg1) {
650865f6aa8SSatish Balay     if (mname[0]) {
651f3dea69dSBarry Smith       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
652865f6aa8SSatish Balay     } else {
653f3dea69dSBarry Smith       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
654865f6aa8SSatish Balay     }
655865f6aa8SSatish Balay   }
656e5c89e4eSSatish Balay #if defined(PETSC_HAVE_MPE)
65790d69ab7SBarry Smith   flg1 = PETSC_FALSE;
658fcfd50ebSBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
659e5c89e4eSSatish Balay   if (flg1) PetscLogMPEBegin();
660e5c89e4eSSatish Balay #endif
66190d69ab7SBarry Smith   flg1 = PETSC_FALSE;
66290d69ab7SBarry Smith   flg2 = PETSC_FALSE;
66390d69ab7SBarry Smith   flg3 = PETSC_FALSE;
664acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
665acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
666d44e083bSSatish Balay   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
6679f7b6320SBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
668e5c89e4eSSatish Balay   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
6699f7b6320SBarry Smith   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
670e5c89e4eSSatish Balay 
671e5c89e4eSSatish Balay   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
672e5c89e4eSSatish Balay   if (flg1) {
673e5c89e4eSSatish Balay     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
674e5c89e4eSSatish Balay     FILE *file;
675e5c89e4eSSatish Balay     if (mname[0]) {
676e5c89e4eSSatish Balay       sprintf(name,"%s.%d",mname,rank);
677e5c89e4eSSatish Balay       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
678e5c89e4eSSatish Balay       file = fopen(fname,"w");
679f3dea69dSBarry Smith       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
680e5c89e4eSSatish Balay     } else {
681da9f1d6bSBarry Smith       file = PETSC_STDOUT;
682e5c89e4eSSatish Balay     }
683e5c89e4eSSatish Balay     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
684e5c89e4eSSatish Balay   }
685e5c89e4eSSatish Balay #endif
686e5c89e4eSSatish Balay 
687e5c89e4eSSatish Balay   /*
688e5c89e4eSSatish Balay       Setup building of stack frames for all function calls
689e5c89e4eSSatish Balay   */
69063d6bff0SBarry Smith #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
691e5c89e4eSSatish Balay   ierr = PetscStackCreate();CHKERRQ(ierr);
692e5c89e4eSSatish Balay #endif
693e5c89e4eSSatish Balay 
694acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
695e5c89e4eSSatish Balay 
696e5c89e4eSSatish Balay   /*
697e5c89e4eSSatish Balay        Print basic help message
698e5c89e4eSSatish Balay   */
699e5c89e4eSSatish Balay   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
700e5c89e4eSSatish Balay   if (flg1) {
701e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
702301d30feSBarry Smith     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
703301d30feSBarry Smith     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
704301d30feSBarry Smith     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
705e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
706e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
707e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
708e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
709e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
710e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
711e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
712e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
713e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
714e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
715e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
716e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
717e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
718e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
719e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
720e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
721e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
722e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
723e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
7244161f2a3SBarry Smith     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
7254161f2a3SBarry Smith     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
726e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
727e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
728e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
729e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
730e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
731e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
732a8c7a070SBarry Smith     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
733e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
734e5c89e4eSSatish Balay #if defined(PETSC_USE_LOG)
735e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
736e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr);
737e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
738e5c89e4eSSatish Balay #if defined(PETSC_HAVE_MPE)
739e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
740e5c89e4eSSatish Balay #endif
7416cf91177SBarry Smith     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
742e5c89e4eSSatish Balay #endif
743e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
744e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
745e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
746e5c89e4eSSatish Balay     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
747e5c89e4eSSatish Balay   }
748e5c89e4eSSatish Balay 
749a6d0e24fSJed Brown   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
750e5c89e4eSSatish Balay   if (flg1) {
751e5c89e4eSSatish Balay     ierr = PetscSleep(si);CHKERRQ(ierr);
752e5c89e4eSSatish Balay   }
753e5c89e4eSSatish Balay 
7546cf91177SBarry Smith   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
755e5c89e4eSSatish Balay   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
756e5c89e4eSSatish Balay   if (f) {
7576cf91177SBarry Smith     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
758e5c89e4eSSatish Balay   }
759827f890bSBarry Smith 
7608154be41SBarry Smith #if defined(PETSC_HAVE_CUSP)
761c97f9302SBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
76273113deaSBarry Smith   if (flg3) flg1 = PETSC_TRUE;
76373113deaSBarry Smith   else flg1 = PETSC_FALSE;
7648154be41SBarry Smith   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
7658154be41SBarry Smith   if (flg1) synchronizeCUSP = PETSC_TRUE;
766bab1f7e6SVictor Minden #endif
767192daf7cSBarry Smith 
768e5c89e4eSSatish Balay   PetscFunctionReturn(0);
769e5c89e4eSSatish Balay }
770df413903SBarry Smith 
771*51d315f7SKerry Stevens /**** 'Tree' Thread Pool Functions ****/
772*51d315f7SKerry Stevens void* PetscThreadFunc_Tree(void* arg) {
773*51d315f7SKerry Stevens   PetscErrorCode iterr;
774*51d315f7SKerry Stevens   int icorr,ierr;
775*51d315f7SKerry Stevens   int* pId = (int*)arg;
776*51d315f7SKerry Stevens   int ThreadId = *pId,Mary = 2,i,SubWorker;
777*51d315f7SKerry Stevens   PetscBool PeeOn;
778*51d315f7SKerry Stevens   cpu_set_t mset;
779*51d315f7SKerry Stevens 
780*51d315f7SKerry Stevens   icorr = ThreadCoreAffinity[ThreadId];
781*51d315f7SKerry Stevens   CPU_ZERO(&mset);
782*51d315f7SKerry Stevens   CPU_SET(icorr,&mset);
783*51d315f7SKerry Stevens   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
784*51d315f7SKerry Stevens 
785*51d315f7SKerry Stevens   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
786*51d315f7SKerry Stevens     PeeOn = PETSC_TRUE;
787*51d315f7SKerry Stevens   }
788*51d315f7SKerry Stevens   else {
789*51d315f7SKerry Stevens     PeeOn = PETSC_FALSE;
790*51d315f7SKerry Stevens   }
791*51d315f7SKerry Stevens   if(PeeOn==PETSC_FALSE) {
792*51d315f7SKerry Stevens     //check your subordinates, wait for them to be ready
793*51d315f7SKerry Stevens     for(i=1;i<=Mary;i++) {
794*51d315f7SKerry Stevens       SubWorker = Mary*ThreadId+i;
795*51d315f7SKerry Stevens       if(SubWorker<PetscMaxThreads) {
796*51d315f7SKerry Stevens         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
797*51d315f7SKerry Stevens         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
798*51d315f7SKerry Stevens           //upon entry, automically releases the lock and blocks
799*51d315f7SKerry Stevens           //upon return, has the lock
800*51d315f7SKerry Stevens           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
801*51d315f7SKerry Stevens         }
802*51d315f7SKerry Stevens         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
803*51d315f7SKerry Stevens       }
804*51d315f7SKerry Stevens     }
805*51d315f7SKerry Stevens     //your subordinates are now ready
806*51d315f7SKerry Stevens   }
807*51d315f7SKerry Stevens   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
808*51d315f7SKerry Stevens   //update your ready status
809*51d315f7SKerry Stevens   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
810*51d315f7SKerry Stevens   if(ThreadId==0) {
811*51d315f7SKerry Stevens     job_tree.eJobStat = JobCompleted;
812*51d315f7SKerry Stevens     //signal main
813*51d315f7SKerry Stevens     ierr = pthread_cond_signal(&main_cond);
814*51d315f7SKerry Stevens   }
815*51d315f7SKerry Stevens   else {
816*51d315f7SKerry Stevens     //tell your boss that you're ready to work
817*51d315f7SKerry Stevens     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
818*51d315f7SKerry Stevens   }
819*51d315f7SKerry Stevens   //the while loop needs to have an exit
820*51d315f7SKerry Stevens   //the 'main' thread can terminate all the threads by performing a broadcast
821*51d315f7SKerry Stevens   //and calling FuncFinish
822*51d315f7SKerry Stevens   while(PetscThreadGo) {
823*51d315f7SKerry Stevens     //need to check the condition to ensure we don't have to wait
824*51d315f7SKerry Stevens     //waiting when you don't have to causes problems
825*51d315f7SKerry Stevens     //also need to check the condition to ensure proper handling of spurious wakeups
826*51d315f7SKerry Stevens     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
827*51d315f7SKerry Stevens         //upon entry, automically releases the lock and blocks
828*51d315f7SKerry Stevens         //upon return, has the lock
829*51d315f7SKerry Stevens         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
830*51d315f7SKerry Stevens 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
831*51d315f7SKerry Stevens 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
832*51d315f7SKerry Stevens     }
833*51d315f7SKerry Stevens     if(ThreadId==0) {
834*51d315f7SKerry Stevens       job_tree.startJob = PETSC_FALSE;
835*51d315f7SKerry Stevens       job_tree.eJobStat = ThreadsWorking;
836*51d315f7SKerry Stevens     }
837*51d315f7SKerry Stevens     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
838*51d315f7SKerry Stevens     if(PeeOn==PETSC_FALSE) {
839*51d315f7SKerry Stevens       //tell your subordinates it's time to get to work
840*51d315f7SKerry Stevens       for(i=1; i<=Mary; i++) {
841*51d315f7SKerry Stevens 	SubWorker = Mary*ThreadId+i;
842*51d315f7SKerry Stevens         if(SubWorker<PetscMaxThreads) {
843*51d315f7SKerry Stevens           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
844*51d315f7SKerry Stevens         }
845*51d315f7SKerry Stevens       }
846*51d315f7SKerry Stevens     }
847*51d315f7SKerry Stevens     //do your job
848*51d315f7SKerry Stevens     if(job_tree.pdata==NULL) {
849*51d315f7SKerry Stevens       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
850*51d315f7SKerry Stevens     }
851*51d315f7SKerry Stevens     else {
852*51d315f7SKerry Stevens       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
853*51d315f7SKerry Stevens     }
854*51d315f7SKerry Stevens     if(iterr!=0) {
855*51d315f7SKerry Stevens       ithreaderr = 1;
856*51d315f7SKerry Stevens     }
857*51d315f7SKerry Stevens     if(PetscThreadGo) {
858*51d315f7SKerry Stevens       //reset job, get ready for more
859*51d315f7SKerry Stevens       if(PeeOn==PETSC_FALSE) {
860*51d315f7SKerry Stevens         //check your subordinates, waiting for them to be ready
861*51d315f7SKerry Stevens 	//how do you know for a fact that a given subordinate has actually started?
862*51d315f7SKerry Stevens 	for(i=1;i<=Mary;i++) {
863*51d315f7SKerry Stevens 	  SubWorker = Mary*ThreadId+i;
864*51d315f7SKerry Stevens           if(SubWorker<PetscMaxThreads) {
865*51d315f7SKerry Stevens             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
866*51d315f7SKerry Stevens             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
867*51d315f7SKerry Stevens               //upon entry, automically releases the lock and blocks
868*51d315f7SKerry Stevens               //upon return, has the lock
869*51d315f7SKerry Stevens               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
870*51d315f7SKerry Stevens             }
871*51d315f7SKerry Stevens             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
872*51d315f7SKerry Stevens           }
873*51d315f7SKerry Stevens 	}
874*51d315f7SKerry Stevens         //your subordinates are now ready
875*51d315f7SKerry Stevens       }
876*51d315f7SKerry Stevens       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
877*51d315f7SKerry Stevens       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
878*51d315f7SKerry Stevens       if(ThreadId==0) {
879*51d315f7SKerry Stevens 	job_tree.eJobStat = JobCompleted; //root thread: last thread to complete, guaranteed!
880*51d315f7SKerry Stevens         //root thread signals 'main'
881*51d315f7SKerry Stevens         ierr = pthread_cond_signal(&main_cond);
882*51d315f7SKerry Stevens       }
883*51d315f7SKerry Stevens       else {
884*51d315f7SKerry Stevens         //signal your boss before you go to sleep
885*51d315f7SKerry Stevens         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
886*51d315f7SKerry Stevens       }
887*51d315f7SKerry Stevens     }
888*51d315f7SKerry Stevens   }
889*51d315f7SKerry Stevens   return NULL;
890*51d315f7SKerry Stevens }
891*51d315f7SKerry Stevens 
892*51d315f7SKerry Stevens #undef __FUNCT__
893*51d315f7SKerry Stevens #define __FUNCT__ "PetscThreadInitialize_Tree"
894*51d315f7SKerry Stevens void* PetscThreadInitialize_Tree(PetscInt N) {
895*51d315f7SKerry Stevens   PetscInt i,ierr;
896*51d315f7SKerry Stevens   int status;
897*51d315f7SKerry Stevens 
898*51d315f7SKerry Stevens   if(PetscUseThreadPool) {
899*51d315f7SKerry Stevens     size_t Val1 = (size_t)CACHE_LINE_SIZE;
900*51d315f7SKerry Stevens     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
901*51d315f7SKerry Stevens     arrmutex = (char*)memalign(Val1,Val2);
902*51d315f7SKerry Stevens     arrcond1 = (char*)memalign(Val1,Val2);
903*51d315f7SKerry Stevens     arrcond2 = (char*)memalign(Val1,Val2);
904*51d315f7SKerry Stevens     arrstart = (char*)memalign(Val1,Val2);
905*51d315f7SKerry Stevens     arrready = (char*)memalign(Val1,Val2);
906*51d315f7SKerry Stevens     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
907*51d315f7SKerry Stevens     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
908*51d315f7SKerry Stevens     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
909*51d315f7SKerry Stevens     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
910*51d315f7SKerry Stevens     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
911*51d315f7SKerry Stevens     //initialize job structure
912*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
913*51d315f7SKerry Stevens       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
914*51d315f7SKerry Stevens       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
915*51d315f7SKerry Stevens       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
916*51d315f7SKerry Stevens       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
917*51d315f7SKerry Stevens       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
918*51d315f7SKerry Stevens     }
919*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
920*51d315f7SKerry Stevens       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
921*51d315f7SKerry Stevens       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
922*51d315f7SKerry Stevens       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
923*51d315f7SKerry Stevens       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
924*51d315f7SKerry Stevens       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
925*51d315f7SKerry Stevens     }
926*51d315f7SKerry Stevens     job_tree.pfunc = NULL;
927*51d315f7SKerry Stevens     job_tree.pdata = (void**)malloc(N*sizeof(void*));
928*51d315f7SKerry Stevens     job_tree.startJob = PETSC_FALSE;
929*51d315f7SKerry Stevens     job_tree.eJobStat = JobInitiated;
930*51d315f7SKerry Stevens     pVal = (int*)malloc(N*sizeof(int));
931*51d315f7SKerry Stevens     //allocate memory in the heap for the thread structure
932*51d315f7SKerry Stevens     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
933*51d315f7SKerry Stevens     //create threads
934*51d315f7SKerry Stevens     for(i=0; i<N; i++) {
935*51d315f7SKerry Stevens       pVal[i] = i;
936*51d315f7SKerry Stevens       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
937*51d315f7SKerry Stevens       //error check
938*51d315f7SKerry Stevens     }
939*51d315f7SKerry Stevens   }
940*51d315f7SKerry Stevens   else {
941*51d315f7SKerry Stevens     //do nothing
942*51d315f7SKerry Stevens   }
943*51d315f7SKerry Stevens   return NULL;
944*51d315f7SKerry Stevens }
945*51d315f7SKerry Stevens 
946*51d315f7SKerry Stevens #undef __FUNCT__
947*51d315f7SKerry Stevens #define __FUNCT__ "PetscThreadFinalize_Tree"
948*51d315f7SKerry Stevens PetscErrorCode PetscThreadFinalize_Tree() {
949*51d315f7SKerry Stevens   int i,ierr;
950*51d315f7SKerry Stevens   void* jstatus;
951*51d315f7SKerry Stevens 
952*51d315f7SKerry Stevens   PetscFunctionBegin;
953*51d315f7SKerry Stevens 
954*51d315f7SKerry Stevens   if(PetscUseThreadPool) {
955*51d315f7SKerry Stevens     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
956*51d315f7SKerry Stevens     //join the threads
957*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
958*51d315f7SKerry Stevens       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
959*51d315f7SKerry Stevens       //do error checking
960*51d315f7SKerry Stevens     }
961*51d315f7SKerry Stevens     free(PetscThreadPoint);
962*51d315f7SKerry Stevens     free(arrmutex);
963*51d315f7SKerry Stevens     free(arrcond1);
964*51d315f7SKerry Stevens     free(arrcond2);
965*51d315f7SKerry Stevens     free(arrstart);
966*51d315f7SKerry Stevens     free(arrready);
967*51d315f7SKerry Stevens     free(job_tree.pdata);
968*51d315f7SKerry Stevens     free(pVal);
969*51d315f7SKerry Stevens   }
970*51d315f7SKerry Stevens   else {
971*51d315f7SKerry Stevens   }
972*51d315f7SKerry Stevens   PetscFunctionReturn(0);
973*51d315f7SKerry Stevens }
974*51d315f7SKerry Stevens 
975*51d315f7SKerry Stevens #undef __FUNCT__
976*51d315f7SKerry Stevens #define __FUNCT__ "MainWait_Tree"
977*51d315f7SKerry Stevens void MainWait_Tree() {
978*51d315f7SKerry Stevens   int ierr;
979*51d315f7SKerry Stevens   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
980*51d315f7SKerry Stevens   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
981*51d315f7SKerry Stevens     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
982*51d315f7SKerry Stevens   }
983*51d315f7SKerry Stevens   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
984*51d315f7SKerry Stevens }
985*51d315f7SKerry Stevens 
986*51d315f7SKerry Stevens #undef __FUNCT__
987*51d315f7SKerry Stevens #define __FUNCT__ "MainJob_Tree"
988*51d315f7SKerry Stevens PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
989*51d315f7SKerry Stevens   int i,ierr;
990*51d315f7SKerry Stevens   PetscErrorCode ijoberr = 0;
991*51d315f7SKerry Stevens   if(PetscUseThreadPool) {
992*51d315f7SKerry Stevens     MainWait();
993*51d315f7SKerry Stevens     job_tree.pfunc = pFunc;
994*51d315f7SKerry Stevens     job_tree.pdata = data;
995*51d315f7SKerry Stevens     job_tree.startJob = PETSC_TRUE;
996*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
997*51d315f7SKerry Stevens       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
998*51d315f7SKerry Stevens     }
999*51d315f7SKerry Stevens     job_tree.eJobStat = JobInitiated;
1000*51d315f7SKerry Stevens     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1001*51d315f7SKerry Stevens     if(pFunc!=FuncFinish) {
1002*51d315f7SKerry Stevens       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1003*51d315f7SKerry Stevens     }
1004*51d315f7SKerry Stevens   }
1005*51d315f7SKerry Stevens   else {
1006*51d315f7SKerry Stevens     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1007*51d315f7SKerry Stevens     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1008*51d315f7SKerry Stevens     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1009*51d315f7SKerry Stevens     free(apThread);
1010*51d315f7SKerry Stevens   }
1011*51d315f7SKerry Stevens   if(ithreaderr) {
1012*51d315f7SKerry Stevens     ijoberr = ithreaderr;
1013*51d315f7SKerry Stevens   }
1014*51d315f7SKerry Stevens   return ijoberr;
1015*51d315f7SKerry Stevens }
1016*51d315f7SKerry Stevens /****  ****/
1017*51d315f7SKerry Stevens 
1018*51d315f7SKerry Stevens /**** 'Main' Thread Pool Functions ****/
1019*51d315f7SKerry Stevens void* PetscThreadFunc_Main(void* arg) {
1020*51d315f7SKerry Stevens   PetscErrorCode iterr;
1021*51d315f7SKerry Stevens   int icorr,ierr;
1022*51d315f7SKerry Stevens   int* pId = (int*)arg;
1023*51d315f7SKerry Stevens   int ThreadId = *pId;
1024*51d315f7SKerry Stevens   cpu_set_t mset;
1025*51d315f7SKerry Stevens 
1026*51d315f7SKerry Stevens   icorr = ThreadCoreAffinity[ThreadId];
1027*51d315f7SKerry Stevens   CPU_ZERO(&mset);
1028*51d315f7SKerry Stevens   CPU_SET(icorr,&mset);
1029*51d315f7SKerry Stevens   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1030*51d315f7SKerry Stevens 
1031*51d315f7SKerry Stevens   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1032*51d315f7SKerry Stevens   //update your ready status
1033*51d315f7SKerry Stevens   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1034*51d315f7SKerry Stevens   //tell the BOSS that you're ready to work before you go to sleep
1035*51d315f7SKerry Stevens   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1036*51d315f7SKerry Stevens 
1037*51d315f7SKerry Stevens   //the while loop needs to have an exit
1038*51d315f7SKerry Stevens   //the 'main' thread can terminate all the threads by performing a broadcast
1039*51d315f7SKerry Stevens   //and calling FuncFinish
1040*51d315f7SKerry Stevens   while(PetscThreadGo) {
1041*51d315f7SKerry Stevens     //need to check the condition to ensure we don't have to wait
1042*51d315f7SKerry Stevens     //waiting when you don't have to causes problems
1043*51d315f7SKerry Stevens     //also need to check the condition to ensure proper handling of spurious wakeups
1044*51d315f7SKerry Stevens     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1045*51d315f7SKerry Stevens         //upon entry, atomically releases the lock and blocks
1046*51d315f7SKerry Stevens         //upon return, has the lock
1047*51d315f7SKerry Stevens         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1048*51d315f7SKerry Stevens 	//*(job_main.arrThreadReady[ThreadId])   = PETSC_FALSE;
1049*51d315f7SKerry Stevens     }
1050*51d315f7SKerry Stevens     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1051*51d315f7SKerry Stevens     //do your job
1052*51d315f7SKerry Stevens     if(job_main.pdata==NULL) {
1053*51d315f7SKerry Stevens       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1054*51d315f7SKerry Stevens     }
1055*51d315f7SKerry Stevens     else {
1056*51d315f7SKerry Stevens       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1057*51d315f7SKerry Stevens     }
1058*51d315f7SKerry Stevens     if(iterr!=0) {
1059*51d315f7SKerry Stevens       ithreaderr = 1;
1060*51d315f7SKerry Stevens     }
1061*51d315f7SKerry Stevens     if(PetscThreadGo) {
1062*51d315f7SKerry Stevens       //reset job, get ready for more
1063*51d315f7SKerry Stevens       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1064*51d315f7SKerry Stevens       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1065*51d315f7SKerry Stevens       //tell the BOSS that you're ready to work before you go to sleep
1066*51d315f7SKerry Stevens       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1067*51d315f7SKerry Stevens     }
1068*51d315f7SKerry Stevens   }
1069*51d315f7SKerry Stevens   return NULL;
1070*51d315f7SKerry Stevens }
1071*51d315f7SKerry Stevens 
1072*51d315f7SKerry Stevens #undef __FUNCT__
1073*51d315f7SKerry Stevens #define __FUNCT__ "PetscThreadInitialize_Main"
1074*51d315f7SKerry Stevens void* PetscThreadInitialize_Main(PetscInt N) {
1075*51d315f7SKerry Stevens   PetscInt i,ierr;
1076*51d315f7SKerry Stevens   int status;
1077*51d315f7SKerry Stevens 
1078*51d315f7SKerry Stevens   if(PetscUseThreadPool) {
1079*51d315f7SKerry Stevens     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1080*51d315f7SKerry Stevens     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1081*51d315f7SKerry Stevens     arrmutex = (char*)memalign(Val1,Val2);
1082*51d315f7SKerry Stevens     arrcond1 = (char*)memalign(Val1,Val2);
1083*51d315f7SKerry Stevens     arrcond2 = (char*)memalign(Val1,Val2);
1084*51d315f7SKerry Stevens     arrstart = (char*)memalign(Val1,Val2);
1085*51d315f7SKerry Stevens     arrready = (char*)memalign(Val1,Val2);
1086*51d315f7SKerry Stevens     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1087*51d315f7SKerry Stevens     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1088*51d315f7SKerry Stevens     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1089*51d315f7SKerry Stevens     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1090*51d315f7SKerry Stevens     //initialize job structure
1091*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
1092*51d315f7SKerry Stevens       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1093*51d315f7SKerry Stevens       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1094*51d315f7SKerry Stevens       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1095*51d315f7SKerry Stevens       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1096*51d315f7SKerry Stevens     }
1097*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
1098*51d315f7SKerry Stevens       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1099*51d315f7SKerry Stevens       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1100*51d315f7SKerry Stevens       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1101*51d315f7SKerry Stevens       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1102*51d315f7SKerry Stevens     }
1103*51d315f7SKerry Stevens     job_main.pfunc = NULL;
1104*51d315f7SKerry Stevens     job_main.pdata = (void**)malloc(N*sizeof(void*));
1105*51d315f7SKerry Stevens     pVal = (int*)malloc(N*sizeof(int));
1106*51d315f7SKerry Stevens     //allocate memory in the heap for the thread structure
1107*51d315f7SKerry Stevens     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1108*51d315f7SKerry Stevens     //create threads
1109*51d315f7SKerry Stevens     for(i=0; i<N; i++) {
1110*51d315f7SKerry Stevens       pVal[i] = i;
1111*51d315f7SKerry Stevens       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1112*51d315f7SKerry Stevens       //error check
1113*51d315f7SKerry Stevens     }
1114*51d315f7SKerry Stevens   }
1115*51d315f7SKerry Stevens   else {
1116*51d315f7SKerry Stevens   }
1117*51d315f7SKerry Stevens   return NULL;
1118*51d315f7SKerry Stevens }
1119*51d315f7SKerry Stevens 
1120*51d315f7SKerry Stevens #undef __FUNCT__
1121*51d315f7SKerry Stevens #define __FUNCT__ "PetscThreadFinalize_Main"
1122*51d315f7SKerry Stevens PetscErrorCode PetscThreadFinalize_Main() {
1123*51d315f7SKerry Stevens   int i,ierr;
1124*51d315f7SKerry Stevens   void* jstatus;
1125*51d315f7SKerry Stevens 
1126*51d315f7SKerry Stevens   PetscFunctionBegin;
1127*51d315f7SKerry Stevens 
1128*51d315f7SKerry Stevens   if(PetscUseThreadPool) {
1129*51d315f7SKerry Stevens     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
1130*51d315f7SKerry Stevens     //join the threads
1131*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
1132*51d315f7SKerry Stevens       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1133*51d315f7SKerry Stevens       //do error checking
1134*51d315f7SKerry Stevens     }
1135*51d315f7SKerry Stevens     free(PetscThreadPoint);
1136*51d315f7SKerry Stevens     free(arrmutex);
1137*51d315f7SKerry Stevens     free(arrcond1);
1138*51d315f7SKerry Stevens     free(arrcond2);
1139*51d315f7SKerry Stevens     free(arrstart);
1140*51d315f7SKerry Stevens     free(arrready);
1141*51d315f7SKerry Stevens     free(job_main.pdata);
1142*51d315f7SKerry Stevens     free(pVal);
1143*51d315f7SKerry Stevens   }
1144*51d315f7SKerry Stevens   else {
1145*51d315f7SKerry Stevens   }
1146*51d315f7SKerry Stevens   PetscFunctionReturn(0);
1147*51d315f7SKerry Stevens }
1148*51d315f7SKerry Stevens 
1149*51d315f7SKerry Stevens #undef __FUNCT__
1150*51d315f7SKerry Stevens #define __FUNCT__ "MainWait_Main"
1151*51d315f7SKerry Stevens void MainWait_Main() {
1152*51d315f7SKerry Stevens   int i,ierr;
1153*51d315f7SKerry Stevens   for(i=0; i<PetscMaxThreads; i++) {
1154*51d315f7SKerry Stevens     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1155*51d315f7SKerry Stevens     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1156*51d315f7SKerry Stevens       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1157*51d315f7SKerry Stevens     }
1158*51d315f7SKerry Stevens     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1159*51d315f7SKerry Stevens   }
1160*51d315f7SKerry Stevens }
1161*51d315f7SKerry Stevens 
1162*51d315f7SKerry Stevens #undef __FUNCT__
1163*51d315f7SKerry Stevens #define __FUNCT__ "MainJob_Main"
1164*51d315f7SKerry Stevens PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1165*51d315f7SKerry Stevens   int i,ierr;
1166*51d315f7SKerry Stevens   PetscErrorCode ijoberr = 0;
1167*51d315f7SKerry Stevens   if(PetscUseThreadPool) {
1168*51d315f7SKerry Stevens     MainWait(); //you know everyone is waiting to be signalled!
1169*51d315f7SKerry Stevens     job_main.pfunc = pFunc;
1170*51d315f7SKerry Stevens     job_main.pdata = data;
1171*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
1172*51d315f7SKerry Stevens       *(job_main.arrThreadReady[i]) = PETSC_FALSE; //why do this?  suppose you get into MainWait first
1173*51d315f7SKerry Stevens     }
1174*51d315f7SKerry Stevens     //tell the threads to go to work
1175*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
1176*51d315f7SKerry Stevens       ierr = pthread_cond_signal(job_main.cond2array[i]);
1177*51d315f7SKerry Stevens     }
1178*51d315f7SKerry Stevens     if(pFunc!=FuncFinish) {
1179*51d315f7SKerry Stevens       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1180*51d315f7SKerry Stevens     }
1181*51d315f7SKerry Stevens   }
1182*51d315f7SKerry Stevens   else {
1183*51d315f7SKerry Stevens     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1184*51d315f7SKerry Stevens     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1185*51d315f7SKerry Stevens     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1186*51d315f7SKerry Stevens     free(apThread);
1187*51d315f7SKerry Stevens   }
1188*51d315f7SKerry Stevens   if(ithreaderr) {
1189*51d315f7SKerry Stevens     ijoberr = ithreaderr;
1190*51d315f7SKerry Stevens   }
1191*51d315f7SKerry Stevens   return ijoberr;
1192*51d315f7SKerry Stevens }
1193*51d315f7SKerry Stevens /****  ****/
1194*51d315f7SKerry Stevens 
1195*51d315f7SKerry Stevens /**** Chain Thread Functions ****/
1196*51d315f7SKerry Stevens void* PetscThreadFunc_Chain(void* arg) {
1197*51d315f7SKerry Stevens   PetscErrorCode iterr;
1198*51d315f7SKerry Stevens   int icorr,ierr;
1199*51d315f7SKerry Stevens   int* pId = (int*)arg;
1200*51d315f7SKerry Stevens   int ThreadId = *pId;
1201*51d315f7SKerry Stevens   int SubWorker = ThreadId + 1;
1202*51d315f7SKerry Stevens   PetscBool PeeOn;
1203*51d315f7SKerry Stevens   cpu_set_t mset;
1204*51d315f7SKerry Stevens 
1205*51d315f7SKerry Stevens   icorr = ThreadCoreAffinity[ThreadId];
1206*51d315f7SKerry Stevens   CPU_ZERO(&mset);
1207*51d315f7SKerry Stevens   CPU_SET(icorr,&mset);
1208*51d315f7SKerry Stevens   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1209*51d315f7SKerry Stevens 
1210*51d315f7SKerry Stevens   if(ThreadId==(PetscMaxThreads-1)) {
1211*51d315f7SKerry Stevens     PeeOn = PETSC_TRUE;
1212*51d315f7SKerry Stevens   }
1213*51d315f7SKerry Stevens   else {
1214*51d315f7SKerry Stevens     PeeOn = PETSC_FALSE;
1215*51d315f7SKerry Stevens   }
1216*51d315f7SKerry Stevens   if(PeeOn==PETSC_FALSE) {
1217*51d315f7SKerry Stevens     //check your subordinate, wait for him to be ready
1218*51d315f7SKerry Stevens     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1219*51d315f7SKerry Stevens     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1220*51d315f7SKerry Stevens       //upon entry, automically releases the lock and blocks
1221*51d315f7SKerry Stevens       //upon return, has the lock
1222*51d315f7SKerry Stevens       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1223*51d315f7SKerry Stevens     }
1224*51d315f7SKerry Stevens     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1225*51d315f7SKerry Stevens     //your subordinate is now ready
1226*51d315f7SKerry Stevens   }
1227*51d315f7SKerry Stevens   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1228*51d315f7SKerry Stevens   //update your ready status
1229*51d315f7SKerry Stevens   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1230*51d315f7SKerry Stevens   if(ThreadId==0) {
1231*51d315f7SKerry Stevens     job_chain.eJobStat = JobCompleted;
1232*51d315f7SKerry Stevens     //signal main
1233*51d315f7SKerry Stevens     ierr = pthread_cond_signal(&main_cond);
1234*51d315f7SKerry Stevens   }
1235*51d315f7SKerry Stevens   else {
1236*51d315f7SKerry Stevens     //tell your boss that you're ready to work
1237*51d315f7SKerry Stevens     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1238*51d315f7SKerry Stevens   }
1239*51d315f7SKerry Stevens   //the while loop needs to have an exit
1240*51d315f7SKerry Stevens   //the 'main' thread can terminate all the threads by performing a broadcast
1241*51d315f7SKerry Stevens   //and calling FuncFinish
1242*51d315f7SKerry Stevens   while(PetscThreadGo) {
1243*51d315f7SKerry Stevens     //need to check the condition to ensure we don't have to wait
1244*51d315f7SKerry Stevens     //waiting when you don't have to causes problems
1245*51d315f7SKerry Stevens     //also need to check the condition to ensure proper handling of spurious wakeups
1246*51d315f7SKerry Stevens     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1247*51d315f7SKerry Stevens         //upon entry, automically releases the lock and blocks
1248*51d315f7SKerry Stevens         //upon return, has the lock
1249*51d315f7SKerry Stevens         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1250*51d315f7SKerry Stevens 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1251*51d315f7SKerry Stevens 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1252*51d315f7SKerry Stevens     }
1253*51d315f7SKerry Stevens     if(ThreadId==0) {
1254*51d315f7SKerry Stevens       job_chain.startJob = PETSC_FALSE;
1255*51d315f7SKerry Stevens       job_chain.eJobStat = ThreadsWorking;
1256*51d315f7SKerry Stevens     }
1257*51d315f7SKerry Stevens     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1258*51d315f7SKerry Stevens     if(PeeOn==PETSC_FALSE) {
1259*51d315f7SKerry Stevens       //tell your subworker it's time to get to work
1260*51d315f7SKerry Stevens       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1261*51d315f7SKerry Stevens     }
1262*51d315f7SKerry Stevens     //do your job
1263*51d315f7SKerry Stevens     if(job_chain.pdata==NULL) {
1264*51d315f7SKerry Stevens       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1265*51d315f7SKerry Stevens     }
1266*51d315f7SKerry Stevens     else {
1267*51d315f7SKerry Stevens       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1268*51d315f7SKerry Stevens     }
1269*51d315f7SKerry Stevens     if(iterr!=0) {
1270*51d315f7SKerry Stevens       ithreaderr = 1;
1271*51d315f7SKerry Stevens     }
1272*51d315f7SKerry Stevens     if(PetscThreadGo) {
1273*51d315f7SKerry Stevens       //reset job, get ready for more
1274*51d315f7SKerry Stevens       if(PeeOn==PETSC_FALSE) {
1275*51d315f7SKerry Stevens         //check your subordinate, wait for him to be ready
1276*51d315f7SKerry Stevens 	//how do you know for a fact that your subordinate has actually started?
1277*51d315f7SKerry Stevens         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1278*51d315f7SKerry Stevens         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1279*51d315f7SKerry Stevens           //upon entry, automically releases the lock and blocks
1280*51d315f7SKerry Stevens           //upon return, has the lock
1281*51d315f7SKerry Stevens           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1282*51d315f7SKerry Stevens         }
1283*51d315f7SKerry Stevens         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1284*51d315f7SKerry Stevens         //your subordinate is now ready
1285*51d315f7SKerry Stevens       }
1286*51d315f7SKerry Stevens       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1287*51d315f7SKerry Stevens       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1288*51d315f7SKerry Stevens       if(ThreadId==0) {
1289*51d315f7SKerry Stevens 	job_chain.eJobStat = JobCompleted; //foreman: last thread to complete, guaranteed!
1290*51d315f7SKerry Stevens         //root thread (foreman) signals 'main'
1291*51d315f7SKerry Stevens         ierr = pthread_cond_signal(&main_cond);
1292*51d315f7SKerry Stevens       }
1293*51d315f7SKerry Stevens       else {
1294*51d315f7SKerry Stevens         //signal your boss before you go to sleep
1295*51d315f7SKerry Stevens         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1296*51d315f7SKerry Stevens       }
1297*51d315f7SKerry Stevens     }
1298*51d315f7SKerry Stevens   }
1299*51d315f7SKerry Stevens   return NULL;
1300*51d315f7SKerry Stevens }
1301*51d315f7SKerry Stevens 
1302*51d315f7SKerry Stevens #undef __FUNCT__
1303*51d315f7SKerry Stevens #define __FUNCT__ "PetscThreadInitialize_Chain"
1304*51d315f7SKerry Stevens void* PetscThreadInitialize_Chain(PetscInt N) {
1305*51d315f7SKerry Stevens   PetscInt i,ierr;
1306*51d315f7SKerry Stevens   int status;
1307*51d315f7SKerry Stevens 
1308*51d315f7SKerry Stevens   if(PetscUseThreadPool) {
1309*51d315f7SKerry Stevens     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1310*51d315f7SKerry Stevens     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1311*51d315f7SKerry Stevens     arrmutex = (char*)memalign(Val1,Val2);
1312*51d315f7SKerry Stevens     arrcond1 = (char*)memalign(Val1,Val2);
1313*51d315f7SKerry Stevens     arrcond2 = (char*)memalign(Val1,Val2);
1314*51d315f7SKerry Stevens     arrstart = (char*)memalign(Val1,Val2);
1315*51d315f7SKerry Stevens     arrready = (char*)memalign(Val1,Val2);
1316*51d315f7SKerry Stevens     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1317*51d315f7SKerry Stevens     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1318*51d315f7SKerry Stevens     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1319*51d315f7SKerry Stevens     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1320*51d315f7SKerry Stevens     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1321*51d315f7SKerry Stevens     //initialize job structure
1322*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
1323*51d315f7SKerry Stevens       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1324*51d315f7SKerry Stevens       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1325*51d315f7SKerry Stevens       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1326*51d315f7SKerry Stevens       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1327*51d315f7SKerry Stevens       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1328*51d315f7SKerry Stevens     }
1329*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
1330*51d315f7SKerry Stevens       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1331*51d315f7SKerry Stevens       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1332*51d315f7SKerry Stevens       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1333*51d315f7SKerry Stevens       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1334*51d315f7SKerry Stevens       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1335*51d315f7SKerry Stevens     }
1336*51d315f7SKerry Stevens     job_chain.pfunc = NULL;
1337*51d315f7SKerry Stevens     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1338*51d315f7SKerry Stevens     job_chain.startJob = PETSC_FALSE;
1339*51d315f7SKerry Stevens     job_chain.eJobStat = JobInitiated;
1340*51d315f7SKerry Stevens     pVal = (int*)malloc(N*sizeof(int));
1341*51d315f7SKerry Stevens     //allocate memory in the heap for the thread structure
1342*51d315f7SKerry Stevens     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1343*51d315f7SKerry Stevens     //create threads
1344*51d315f7SKerry Stevens     for(i=0; i<N; i++) {
1345*51d315f7SKerry Stevens       pVal[i] = i;
1346*51d315f7SKerry Stevens       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1347*51d315f7SKerry Stevens       //error check
1348*51d315f7SKerry Stevens     }
1349*51d315f7SKerry Stevens   }
1350*51d315f7SKerry Stevens   else {
1351*51d315f7SKerry Stevens   }
1352*51d315f7SKerry Stevens   return NULL;
1353*51d315f7SKerry Stevens }
1354*51d315f7SKerry Stevens 
1355*51d315f7SKerry Stevens 
1356*51d315f7SKerry Stevens #undef __FUNCT__
1357*51d315f7SKerry Stevens #define __FUNCT__ "PetscThreadFinalize_Chain"
1358*51d315f7SKerry Stevens PetscErrorCode PetscThreadFinalize_Chain() {
1359*51d315f7SKerry Stevens   int i,ierr;
1360*51d315f7SKerry Stevens   void* jstatus;
1361*51d315f7SKerry Stevens 
1362*51d315f7SKerry Stevens   PetscFunctionBegin;
1363*51d315f7SKerry Stevens 
1364*51d315f7SKerry Stevens   if(PetscUseThreadPool) {
1365*51d315f7SKerry Stevens     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
1366*51d315f7SKerry Stevens     //join the threads
1367*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
1368*51d315f7SKerry Stevens       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1369*51d315f7SKerry Stevens       //do error checking
1370*51d315f7SKerry Stevens     }
1371*51d315f7SKerry Stevens     free(PetscThreadPoint);
1372*51d315f7SKerry Stevens     free(arrmutex);
1373*51d315f7SKerry Stevens     free(arrcond1);
1374*51d315f7SKerry Stevens     free(arrcond2);
1375*51d315f7SKerry Stevens     free(arrstart);
1376*51d315f7SKerry Stevens     free(arrready);
1377*51d315f7SKerry Stevens     free(job_chain.pdata);
1378*51d315f7SKerry Stevens     free(pVal);
1379*51d315f7SKerry Stevens   }
1380*51d315f7SKerry Stevens   else {
1381*51d315f7SKerry Stevens   }
1382*51d315f7SKerry Stevens   PetscFunctionReturn(0);
1383*51d315f7SKerry Stevens }
1384*51d315f7SKerry Stevens 
1385*51d315f7SKerry Stevens #undef __FUNCT__
1386*51d315f7SKerry Stevens #define __FUNCT__ "MainWait_Chain"
1387*51d315f7SKerry Stevens void MainWait_Chain() {
1388*51d315f7SKerry Stevens   int ierr;
1389*51d315f7SKerry Stevens   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1390*51d315f7SKerry Stevens   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1391*51d315f7SKerry Stevens     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1392*51d315f7SKerry Stevens   }
1393*51d315f7SKerry Stevens   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1394*51d315f7SKerry Stevens }
1395*51d315f7SKerry Stevens 
1396*51d315f7SKerry Stevens #undef __FUNCT__
1397*51d315f7SKerry Stevens #define __FUNCT__ "MainJob_Chain"
1398*51d315f7SKerry Stevens PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1399*51d315f7SKerry Stevens   int i,ierr;
1400*51d315f7SKerry Stevens   PetscErrorCode ijoberr = 0;
1401*51d315f7SKerry Stevens   if(PetscUseThreadPool) {
1402*51d315f7SKerry Stevens     MainWait();
1403*51d315f7SKerry Stevens     job_chain.pfunc = pFunc;
1404*51d315f7SKerry Stevens     job_chain.pdata = data;
1405*51d315f7SKerry Stevens     job_chain.startJob = PETSC_TRUE;
1406*51d315f7SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
1407*51d315f7SKerry Stevens       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1408*51d315f7SKerry Stevens     }
1409*51d315f7SKerry Stevens     job_chain.eJobStat = JobInitiated;
1410*51d315f7SKerry Stevens     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1411*51d315f7SKerry Stevens     if(pFunc!=FuncFinish) {
1412*51d315f7SKerry Stevens       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1413*51d315f7SKerry Stevens     }
1414*51d315f7SKerry Stevens   }
1415*51d315f7SKerry Stevens   else {
1416*51d315f7SKerry Stevens     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1417*51d315f7SKerry Stevens     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1418*51d315f7SKerry Stevens     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1419*51d315f7SKerry Stevens     free(apThread);
1420*51d315f7SKerry Stevens   }
1421*51d315f7SKerry Stevens   if(ithreaderr) {
1422*51d315f7SKerry Stevens     ijoberr = ithreaderr;
1423*51d315f7SKerry Stevens   }
1424*51d315f7SKerry Stevens   return ijoberr;
1425*51d315f7SKerry Stevens }
1426*51d315f7SKerry Stevens /****  ****/
1427*51d315f7SKerry Stevens 
1428*51d315f7SKerry Stevens /**** True Thread Functions ****/
1429*51d315f7SKerry Stevens void* PetscThreadFunc_True(void* arg) {
1430*51d315f7SKerry Stevens   int icorr,ierr,iVal;
143151dcc849SKerry Stevens   int* pId = (int*)arg;
143251dcc849SKerry Stevens   int ThreadId = *pId;
14330ca81413SKerry Stevens   PetscErrorCode iterr;
1434*51d315f7SKerry Stevens   cpu_set_t mset;
143551dcc849SKerry Stevens 
1436*51d315f7SKerry Stevens   icorr = ThreadCoreAffinity[ThreadId];
1437*51d315f7SKerry Stevens   CPU_ZERO(&mset);
1438*51d315f7SKerry Stevens   CPU_SET(icorr,&mset);
1439*51d315f7SKerry Stevens   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1440*51d315f7SKerry Stevens 
1441*51d315f7SKerry Stevens   ierr = pthread_mutex_lock(&job_true.mutex);
1442*51d315f7SKerry Stevens   job_true.iNumReadyThreads++;
1443*51d315f7SKerry Stevens   if(job_true.iNumReadyThreads==PetscMaxThreads) {
144451dcc849SKerry Stevens     ierr = pthread_cond_signal(&main_cond);
144551dcc849SKerry Stevens   }
144651dcc849SKerry Stevens   //the while loop needs to have an exit
1447*51d315f7SKerry Stevens   //the 'main' thread can terminate all the threads by performing a broadcast
144851dcc849SKerry Stevens   //and calling FuncFinish
144951dcc849SKerry Stevens   while(PetscThreadGo) {
145051dcc849SKerry Stevens     //need to check the condition to ensure we don't have to wait
145151dcc849SKerry Stevens     //waiting when you don't have to causes problems
145251dcc849SKerry Stevens     //also need to wait if another thread sneaks in and messes with the predicate
1453*51d315f7SKerry Stevens     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
145451dcc849SKerry Stevens       //upon entry, automically releases the lock and blocks
145551dcc849SKerry Stevens       //upon return, has the lock
1456*51d315f7SKerry Stevens       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
145751dcc849SKerry Stevens     }
1458*51d315f7SKerry Stevens     job_true.startJob = PETSC_FALSE;
1459*51d315f7SKerry Stevens     job_true.iNumJobThreads--;
1460*51d315f7SKerry Stevens     job_true.iNumReadyThreads--;
1461*51d315f7SKerry Stevens     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1462*51d315f7SKerry Stevens     pthread_mutex_unlock(&job_true.mutex);
1463*51d315f7SKerry Stevens     if(job_true.pdata==NULL) {
1464*51d315f7SKerry Stevens       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
146551dcc849SKerry Stevens     }
146651dcc849SKerry Stevens     else {
1467*51d315f7SKerry Stevens       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
146851dcc849SKerry Stevens     }
14690ca81413SKerry Stevens     if(iterr!=0) {
14700ca81413SKerry Stevens       ithreaderr = 1;
14710ca81413SKerry Stevens     }
1472*51d315f7SKerry Stevens     //the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
14730ca81413SKerry Stevens     //what happens if a thread finishes before they all start? BAD!
14740ca81413SKerry Stevens     //what happens if a thread finishes before any else start? BAD!
1475*51d315f7SKerry Stevens     pthread_barrier_wait(job_true.pbarr); //ensures all threads are finished
147651dcc849SKerry Stevens     //reset job
147751dcc849SKerry Stevens     if(PetscThreadGo) {
1478*51d315f7SKerry Stevens       pthread_mutex_lock(&job_true.mutex);
1479*51d315f7SKerry Stevens       job_true.iNumReadyThreads++;
1480*51d315f7SKerry Stevens       if(job_true.iNumReadyThreads==PetscMaxThreads) {
14810ca81413SKerry Stevens 	//signal the 'main' thread that the job is done! (only done once)
148251dcc849SKerry Stevens 	ierr = pthread_cond_signal(&main_cond);
148351dcc849SKerry Stevens       }
148451dcc849SKerry Stevens     }
148551dcc849SKerry Stevens   }
148651dcc849SKerry Stevens   return NULL;
148751dcc849SKerry Stevens }
148851dcc849SKerry Stevens 
1489f09cb4aaSKerry Stevens #undef __FUNCT__
1490*51d315f7SKerry Stevens #define __FUNCT__ "PetscThreadInitialize_True"
1491*51d315f7SKerry Stevens void* PetscThreadInitialize_True(PetscInt N) {
149251dcc849SKerry Stevens   PetscInt i;
149351dcc849SKerry Stevens   int status;
14940ca81413SKerry Stevens 
14950ca81413SKerry Stevens   if(PetscUseThreadPool) {
1496f09cb4aaSKerry Stevens     pVal = (int*)malloc(N*sizeof(int));
149751dcc849SKerry Stevens     //allocate memory in the heap for the thread structure
149851dcc849SKerry Stevens     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
149951dcc849SKerry Stevens     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); //BarrPoint[0] makes no sense, don't use it!
1500*51d315f7SKerry Stevens     job_true.pdata = (void**)malloc(N*sizeof(void*));
150151dcc849SKerry Stevens     for(i=0; i<N; i++) {
1502f09cb4aaSKerry Stevens       pVal[i] = i;
1503f09cb4aaSKerry Stevens       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
150451dcc849SKerry Stevens       //error check to ensure proper thread creation
150551dcc849SKerry Stevens       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
150651dcc849SKerry Stevens       //error check
150751dcc849SKerry Stevens     }
15080ca81413SKerry Stevens   }
15090ca81413SKerry Stevens   else {
15100ca81413SKerry Stevens   }
151151dcc849SKerry Stevens   return NULL;
151251dcc849SKerry Stevens }
151351dcc849SKerry Stevens 
1514f09cb4aaSKerry Stevens 
1515f09cb4aaSKerry Stevens #undef __FUNCT__
1516*51d315f7SKerry Stevens #define __FUNCT__ "PetscThreadFinalize_True"
1517*51d315f7SKerry Stevens PetscErrorCode PetscThreadFinalize_True() {
151851dcc849SKerry Stevens   int i,ierr;
151951dcc849SKerry Stevens   void* jstatus;
152051dcc849SKerry Stevens 
152151dcc849SKerry Stevens   PetscFunctionBegin;
15220ca81413SKerry Stevens 
15230ca81413SKerry Stevens   if(PetscUseThreadPool) {
15240ca81413SKerry Stevens     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
152551dcc849SKerry Stevens     //join the threads
152651dcc849SKerry Stevens     for(i=0; i<PetscMaxThreads; i++) {
152751dcc849SKerry Stevens       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
152851dcc849SKerry Stevens       //do error checking
152951dcc849SKerry Stevens     }
153051dcc849SKerry Stevens     free(BarrPoint);
153151dcc849SKerry Stevens     free(PetscThreadPoint);
15320ca81413SKerry Stevens   }
15330ca81413SKerry Stevens   else {
15340ca81413SKerry Stevens   }
153551dcc849SKerry Stevens   PetscFunctionReturn(0);
153651dcc849SKerry Stevens }
153751dcc849SKerry Stevens 
1538f09cb4aaSKerry Stevens #undef __FUNCT__
1539*51d315f7SKerry Stevens #define __FUNCT__ "MainWait_True"
1540*51d315f7SKerry Stevens void MainWait_True() {
154151dcc849SKerry Stevens   int ierr;
1542*51d315f7SKerry Stevens   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1543*51d315f7SKerry Stevens     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
154451dcc849SKerry Stevens   }
1545*51d315f7SKerry Stevens   ierr = pthread_mutex_unlock(&job_true.mutex);
154651dcc849SKerry Stevens }
154751dcc849SKerry Stevens 
1548f09cb4aaSKerry Stevens #undef __FUNCT__
1549*51d315f7SKerry Stevens #define __FUNCT__ "MainJob_True"
1550*51d315f7SKerry Stevens PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
155151dcc849SKerry Stevens   int ierr;
15520ca81413SKerry Stevens   PetscErrorCode ijoberr = 0;
15530ca81413SKerry Stevens   if(PetscUseThreadPool) {
15540ca81413SKerry Stevens     MainWait();
1555*51d315f7SKerry Stevens     job_true.pfunc = pFunc;
1556*51d315f7SKerry Stevens     job_true.pdata = data;
1557*51d315f7SKerry Stevens     job_true.pbarr = &BarrPoint[n];
1558*51d315f7SKerry Stevens     job_true.iNumJobThreads = n;
1559*51d315f7SKerry Stevens     job_true.startJob = PETSC_TRUE;
1560*51d315f7SKerry Stevens     ierr = pthread_cond_broadcast(&job_true.cond);
15610ca81413SKerry Stevens     if(pFunc!=FuncFinish) {
15620ca81413SKerry Stevens       MainWait(); //why wait after? guarantees that job gets done
15630ca81413SKerry Stevens     }
15640ca81413SKerry Stevens   }
15650ca81413SKerry Stevens   else {
15660ca81413SKerry Stevens     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
15670ca81413SKerry Stevens     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
15680ca81413SKerry Stevens     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
15690ca81413SKerry Stevens     free(apThread);
15700ca81413SKerry Stevens   }
15710ca81413SKerry Stevens   if(ithreaderr) {
15720ca81413SKerry Stevens     ijoberr = ithreaderr;
15730ca81413SKerry Stevens   }
15740ca81413SKerry Stevens   return ijoberr;
157551dcc849SKerry Stevens }
1576*51d315f7SKerry Stevens /****  ****/
157751dcc849SKerry Stevens 
157851dcc849SKerry Stevens void* FuncFinish(void* arg) {
157951dcc849SKerry Stevens   PetscThreadGo = PETSC_FALSE;
15800ca81413SKerry Stevens   return(0);
158151dcc849SKerry Stevens }
1582