1 //new kds file - implements all thread pool versions 2 /* 3 4 This file defines part of the initialization of PETSc 5 6 This file uses regular malloc and free because it cannot know 7 what malloc is being used until it has already processed the input. 8 */ 9 10 #define _GNU_SOURCE 11 #include <sched.h> 12 #include <petscsys.h> /*I "petscsys.h" I*/ 13 #include <pthread.h> 14 #include <sys/sysinfo.h> 15 #include <unistd.h> 16 #if defined(PETSC_HAVE_STDLIB_H) 17 #include <stdlib.h> 18 #endif 19 #if defined(PETSC_HAVE_MALLOC_H) 20 #include <malloc.h> 21 #endif 22 #if defined(PETSC_HAVE_VALGRIND) 23 #include <valgrind/valgrind.h> 24 #endif 25 26 /* ------------------------Nasty global variables -------------------------------*/ 27 /* 28 Indicates if PETSc started up MPI, or it was 29 already started before PETSc was initialized. 30 */ 31 PetscBool PetscBeganMPI = PETSC_FALSE; 32 PetscBool PetscInitializeCalled = PETSC_FALSE; 33 PetscBool PetscFinalizeCalled = PETSC_FALSE; 34 PetscBool PetscUseThreadPool = PETSC_FALSE; 35 PetscBool PetscThreadGo = PETSC_TRUE; 36 PetscMPIInt PetscGlobalRank = -1; 37 PetscMPIInt PetscGlobalSize = -1; 38 PetscMPIInt PetscMaxThreads = 2; 39 pthread_t* PetscThreadPoint; 40 pthread_barrier_t* BarrPoint; //used by 'true' thread pool 41 PetscErrorCode ithreaderr = 0; 42 int* pVal; 43 44 #define CACHE_LINE_SIZE 64 //used by 'chain', 'main','tree' thread pools 45 int* ThreadCoreAffinity; 46 47 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat; //used by 'chain','tree' thread pool 48 49 typedef struct { 50 pthread_mutex_t** mutexarray; 51 pthread_cond_t** cond1array; 52 pthread_cond_t** cond2array; 53 void* (*pfunc)(void*); 54 void** pdata; 55 PetscBool startJob; 56 estat eJobStat; 57 PetscBool** arrThreadStarted; 58 PetscBool** arrThreadReady; 59 } sjob_tree; 60 sjob_tree job_tree; 61 typedef struct { 62 pthread_mutex_t** mutexarray; 63 pthread_cond_t** cond1array; 64 pthread_cond_t** cond2array; 65 void* (*pfunc)(void*); 66 void** pdata; 67 PetscBool** arrThreadReady; 68 } sjob_main; 69 sjob_main job_main; 70 typedef struct { 71 pthread_mutex_t** mutexarray; 72 pthread_cond_t** cond1array; 73 pthread_cond_t** cond2array; 74 void* (*pfunc)(void*); 75 void** pdata; 76 PetscBool startJob; 77 estat eJobStat; 78 PetscBool** arrThreadStarted; 79 PetscBool** arrThreadReady; 80 } sjob_chain; 81 sjob_chain job_chain; 82 typedef struct { 83 pthread_mutex_t mutex; 84 pthread_cond_t cond; 85 void* (*pfunc)(void*); 86 void** pdata; 87 pthread_barrier_t* pbarr; 88 int iNumJobThreads; 89 int iNumReadyThreads; 90 PetscBool startJob; 91 } sjob_true; 92 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE}; 93 94 pthread_cond_t main_cond = PTHREAD_COND_INITIALIZER; //used by 'true', 'chain','tree' thread pools 95 char* arrmutex; //used by 'chain','main','tree' thread pools 96 char* arrcond1; //used by 'chain','main','tree' thread pools 97 char* arrcond2; //used by 'chain','main','tree' thread pools 98 char* arrstart; //used by 'chain','main','tree' thread pools 99 char* arrready; //used by 'chain','main','tree' thread pools 100 101 /* Function Pointers */ 102 void* (*PetscThreadFunc)(void*) = NULL; 103 void* (*PetscThreadInitialize)(PetscInt) = NULL; 104 PetscErrorCode (*PetscThreadFinalize)(void) = NULL; 105 void (*MainWait)(void) = NULL; 106 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL; 107 /**** Tree Functions ****/ 108 void* PetscThreadFunc_Tree(void*); 109 void* PetscThreadInitialize_Tree(PetscInt); 110 PetscErrorCode PetscThreadFinalize_Tree(void); 111 void MainWait_Tree(void); 112 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt); 113 /**** Main Functions ****/ 114 void* PetscThreadFunc_Main(void*); 115 void* PetscThreadInitialize_Main(PetscInt); 116 PetscErrorCode PetscThreadFinalize_Main(void); 117 void MainWait_Main(void); 118 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt); 119 /**** Chain Functions ****/ 120 void* PetscThreadFunc_Chain(void*); 121 void* PetscThreadInitialize_Chain(PetscInt); 122 PetscErrorCode PetscThreadFinalize_Chain(void); 123 void MainWait_Chain(void); 124 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt); 125 /**** True Functions ****/ 126 void* PetscThreadFunc_True(void*); 127 void* PetscThreadInitialize_True(PetscInt); 128 PetscErrorCode PetscThreadFinalize_True(void); 129 void MainWait_True(void); 130 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt); 131 /**** ****/ 132 133 void* FuncFinish(void*); 134 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**); 135 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*); 136 137 #if defined(PETSC_USE_COMPLEX) 138 #if defined(PETSC_COMPLEX_INSTANTIATE) 139 template <> class std::complex<double>; /* instantiate complex template class */ 140 #endif 141 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX) 142 MPI_Datatype MPI_C_DOUBLE_COMPLEX; 143 MPI_Datatype MPI_C_COMPLEX; 144 #endif 145 PetscScalar PETSC_i; 146 #else 147 PetscScalar PETSC_i = 0.0; 148 #endif 149 #if defined(PETSC_USE_REAL___FLOAT128) 150 MPI_Datatype MPIU___FLOAT128 = 0; 151 #endif 152 MPI_Datatype MPIU_2SCALAR = 0; 153 MPI_Datatype MPIU_2INT = 0; 154 155 /* 156 These are needed by petscbt.h 157 */ 158 #include <petscbt.h> 159 char _BT_mask = ' '; 160 char _BT_c = ' '; 161 PetscInt _BT_idx = 0; 162 163 /* 164 Function that is called to display all error messages 165 */ 166 PetscErrorCode (*PetscErrorPrintf)(const char [],...) = PetscErrorPrintfDefault; 167 PetscErrorCode (*PetscHelpPrintf)(MPI_Comm,const char [],...) = PetscHelpPrintfDefault; 168 #if defined(PETSC_HAVE_MATLAB_ENGINE) 169 PetscErrorCode (*PetscVFPrintf)(FILE*,const char[],va_list) = PetscVFPrintf_Matlab; 170 #else 171 PetscErrorCode (*PetscVFPrintf)(FILE*,const char[],va_list) = PetscVFPrintfDefault; 172 #endif 173 /* 174 This is needed to turn on/off cusp synchronization */ 175 PetscBool synchronizeCUSP = PETSC_FALSE; 176 177 /* ------------------------------------------------------------------------------*/ 178 /* 179 Optional file where all PETSc output from various prints is saved 180 */ 181 FILE *petsc_history = PETSC_NULL; 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "PetscOpenHistoryFile" 185 PetscErrorCode PetscOpenHistoryFile(const char filename[],FILE **fd) 186 { 187 PetscErrorCode ierr; 188 PetscMPIInt rank,size; 189 char pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64]; 190 char version[256]; 191 192 PetscFunctionBegin; 193 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 194 if (!rank) { 195 char arch[10]; 196 int err; 197 PetscViewer viewer; 198 199 ierr = PetscGetArchType(arch,10);CHKERRQ(ierr); 200 ierr = PetscGetDate(date,64);CHKERRQ(ierr); 201 ierr = PetscGetVersion(version,256);CHKERRQ(ierr); 202 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 203 if (filename) { 204 ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr); 205 } else { 206 ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr); 207 ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr); 208 ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr); 209 } 210 211 *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname); 212 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 213 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr); 214 ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 215 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr); 216 ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr); 217 ierr = PetscOptionsView(viewer);CHKERRQ(ierr); 218 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 219 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 220 err = fflush(*fd); 221 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); 222 } 223 PetscFunctionReturn(0); 224 } 225 226 #undef __FUNCT__ 227 #define __FUNCT__ "PetscCloseHistoryFile" 228 PetscErrorCode PetscCloseHistoryFile(FILE **fd) 229 { 230 PetscErrorCode ierr; 231 PetscMPIInt rank; 232 char date[64]; 233 int err; 234 235 PetscFunctionBegin; 236 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 237 if (!rank) { 238 ierr = PetscGetDate(date,64);CHKERRQ(ierr); 239 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 240 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr); 241 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 242 err = fflush(*fd); 243 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); 244 err = fclose(*fd); 245 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file"); 246 } 247 PetscFunctionReturn(0); 248 } 249 250 /* ------------------------------------------------------------------------------*/ 251 252 /* 253 This is ugly and probably belongs somewhere else, but I want to 254 be able to put a true MPI abort error handler with command line args. 255 256 This is so MPI errors in the debugger will leave all the stack 257 frames. The default MP_Abort() cleans up and exits thus providing no useful information 258 in the debugger hence we call abort() instead of MPI_Abort(). 259 */ 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "Petsc_MPI_AbortOnError" 263 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag) 264 { 265 PetscFunctionBegin; 266 (*PetscErrorPrintf)("MPI error %d\n",*flag); 267 abort(); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "Petsc_MPI_DebuggerOnError" 272 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag) 273 { 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 (*PetscErrorPrintf)("MPI error %d\n",*flag); 278 ierr = PetscAttachDebugger(); 279 if (ierr) { /* hopeless so get out */ 280 MPI_Abort(*comm,*flag); 281 } 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "PetscEnd" 286 /*@C 287 PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one 288 wishes a clean exit somewhere deep in the program. 289 290 Collective on PETSC_COMM_WORLD 291 292 Options Database Keys are the same as for PetscFinalize() 293 294 Level: advanced 295 296 Note: 297 See PetscInitialize() for more general runtime options. 298 299 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize() 300 @*/ 301 PetscErrorCode PetscEnd(void) 302 { 303 PetscFunctionBegin; 304 PetscFinalize(); 305 exit(0); 306 return 0; 307 } 308 309 PetscBool PetscOptionsPublish = PETSC_FALSE; 310 extern PetscErrorCode PetscSetUseTrMalloc_Private(void); 311 extern PetscBool petscsetmallocvisited; 312 static char emacsmachinename[256]; 313 314 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0; 315 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm) = 0; 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "PetscSetHelpVersionFunctions" 319 /*@C 320 PetscSetHelpVersionFunctions - Sets functions that print help and version information 321 before the PETSc help and version information is printed. Must call BEFORE PetscInitialize(). 322 This routine enables a "higher-level" package that uses PETSc to print its messages first. 323 324 Input Parameter: 325 + help - the help function (may be PETSC_NULL) 326 - version - the version function (may be PETSC_NULL) 327 328 Level: developer 329 330 Concepts: package help message 331 332 @*/ 333 PetscErrorCode PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm)) 334 { 335 PetscFunctionBegin; 336 PetscExternalHelpFunction = help; 337 PetscExternalVersionFunction = version; 338 PetscFunctionReturn(0); 339 } 340 341 #undef __FUNCT__ 342 #define __FUNCT__ "PetscOptionsCheckInitial_Private" 343 PetscErrorCode PetscOptionsCheckInitial_Private(void) 344 { 345 char string[64],mname[PETSC_MAX_PATH_LEN],*f; 346 MPI_Comm comm = PETSC_COMM_WORLD; 347 PetscBool flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout; 348 PetscErrorCode ierr; 349 PetscReal si; 350 int i; 351 PetscMPIInt rank; 352 char version[256]; 353 354 PetscFunctionBegin; 355 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 356 357 /* 358 Setup the memory management; support for tracing malloc() usage 359 */ 360 ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr); 361 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD) 362 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr); 363 if ((!flg2 || flg1) && !petscsetmallocvisited) { 364 #if defined(PETSC_HAVE_VALGRIND) 365 if (flg2 || !(RUNNING_ON_VALGRIND)) { 366 /* turn off default -malloc if valgrind is being used */ 367 #endif 368 ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr); 369 #if defined(PETSC_HAVE_VALGRIND) 370 } 371 #endif 372 } 373 #else 374 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr); 375 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr); 376 if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);} 377 #endif 378 if (flg3) { 379 ierr = PetscMallocSetDumpLog();CHKERRQ(ierr); 380 } 381 flg1 = PETSC_FALSE; 382 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr); 383 if (flg1) { 384 ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr); 385 ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr); 386 } 387 388 flg1 = PETSC_FALSE; 389 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr); 390 if (!flg1) { 391 flg1 = PETSC_FALSE; 392 ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr); 393 } 394 if (flg1) { 395 ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr); 396 } 397 398 /* 399 Set the display variable for graphics 400 */ 401 ierr = PetscSetDisplay();CHKERRQ(ierr); 402 403 /* 404 Determine whether user specified maximum number of threads 405 */ 406 ierr = PetscOptionsHasName(PETSC_NULL,"-thread_max",&flg1);CHKERRQ(ierr); 407 if(flg1) { 408 ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr); 409 } 410 411 /* 412 Determine whether to use thread pool 413 */ 414 ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr); 415 if(flg1) { 416 PetscUseThreadPool = PETSC_TRUE; 417 PetscInt N_CORES = get_nprocs(); 418 ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int)); 419 char tstr[9]; 420 char tbuf[2]; 421 strcpy(tstr,"-thread"); 422 for(i=0;i<PetscMaxThreads;i++) { 423 ThreadCoreAffinity[i] = i; //default 424 sprintf(tbuf,"%d",i); 425 strcat(tstr,tbuf); 426 ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr); 427 if(flg1) { 428 ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr); 429 ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; //check on the user 430 } 431 tstr[7] = '\0'; 432 } 433 //get the thread pool type 434 PetscInt ipool = 0; 435 ierr = PetscOptionsGetInt(PETSC_NULL,"-pool",&ipool,PETSC_NULL);CHKERRQ(ierr); 436 switch(ipool) { 437 case 1: 438 PetscThreadFunc = &PetscThreadFunc_Tree; 439 PetscThreadInitialize = &PetscThreadInitialize_Tree; 440 PetscThreadFinalize = &PetscThreadFinalize_Tree; 441 MainWait = &MainWait_Tree; 442 MainJob = &MainJob_Tree; 443 break; 444 case 2: 445 PetscThreadFunc = &PetscThreadFunc_Main; 446 PetscThreadInitialize = &PetscThreadInitialize_Main; 447 PetscThreadFinalize = &PetscThreadFinalize_Main; 448 MainWait = &MainWait_Main; 449 MainJob = &MainJob_Main; 450 break; 451 case 3: 452 PetscThreadFunc = &PetscThreadFunc_Chain; 453 PetscThreadInitialize = &PetscThreadInitialize_Chain; 454 PetscThreadFinalize = &PetscThreadFinalize_Chain; 455 MainWait = &MainWait_Chain; 456 MainJob = &MainJob_Chain; 457 break; 458 default: 459 PetscThreadFunc = &PetscThreadFunc_True; 460 PetscThreadInitialize = &PetscThreadInitialize_True; 461 PetscThreadFinalize = &PetscThreadFinalize_True; 462 MainWait = &MainWait_True; 463 MainJob = &MainJob_True; 464 break; 465 } 466 } 467 else { 468 //need to define these in the case on 'no threads' or 'thread create/destroy' 469 //could take any of the above versions 470 PetscThreadInitialize = &PetscThreadInitialize_True; 471 PetscThreadFinalize = &PetscThreadFinalize_True; 472 MainJob = &MainJob_True; 473 } 474 PetscThreadInitialize(PetscMaxThreads); 475 /* 476 Print the PETSc version information 477 */ 478 ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr); 479 ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr); 480 ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr); 481 if (flg1 || flg2 || flg3){ 482 483 /* 484 Print "higher-level" package version message 485 */ 486 if (PetscExternalVersionFunction) { 487 ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr); 488 } 489 490 ierr = PetscGetVersion(version,256);CHKERRQ(ierr); 491 ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\ 492 ------------------------------\n");CHKERRQ(ierr); 493 ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr); 494 ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr); 495 ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr); 496 ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr); 497 ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr); 498 ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr); 499 ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\ 500 ------------------------------\n");CHKERRQ(ierr); 501 } 502 503 /* 504 Print "higher-level" package help message 505 */ 506 if (flg3){ 507 if (PetscExternalHelpFunction) { 508 ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr); 509 } 510 } 511 512 /* 513 Setup the error handling 514 */ 515 flg1 = PETSC_FALSE; 516 ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr); 517 if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);} 518 flg1 = PETSC_FALSE; 519 ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr); 520 if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);} 521 flg1 = PETSC_FALSE; 522 ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr); 523 if (flg1) { 524 ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr); 525 } 526 flg1 = PETSC_FALSE; 527 ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr); 528 if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);} 529 flg1 = PETSC_FALSE; 530 ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr); 531 if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);} 532 533 /* 534 Setup debugger information 535 */ 536 ierr = PetscSetDefaultDebugger();CHKERRQ(ierr); 537 ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr); 538 if (flg1) { 539 MPI_Errhandler err_handler; 540 541 ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr); 542 ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr); 543 ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr); 544 ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr); 545 } 546 ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr); 547 if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); } 548 ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr); 549 ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr); 550 if (flg1 || flg2) { 551 PetscMPIInt size; 552 PetscInt lsize,*nodes; 553 MPI_Errhandler err_handler; 554 /* 555 we have to make sure that all processors have opened 556 connections to all other processors, otherwise once the 557 debugger has stated it is likely to receive a SIGUSR1 558 and kill the program. 559 */ 560 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 561 if (size > 2) { 562 PetscMPIInt dummy = 0; 563 MPI_Status status; 564 for (i=0; i<size; i++) { 565 if (rank != i) { 566 ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr); 567 } 568 } 569 for (i=0; i<size; i++) { 570 if (rank != i) { 571 ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr); 572 } 573 } 574 } 575 /* check if this processor node should be in debugger */ 576 ierr = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr); 577 lsize = size; 578 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr); 579 if (flag) { 580 for (i=0; i<lsize; i++) { 581 if (nodes[i] == rank) { flag = PETSC_FALSE; break; } 582 } 583 } 584 if (!flag) { 585 ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr); 586 ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr); 587 if (flg1) { 588 ierr = PetscAttachDebugger();CHKERRQ(ierr); 589 } else { 590 ierr = PetscStopForDebugger();CHKERRQ(ierr); 591 } 592 ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr); 593 ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr); 594 } 595 ierr = PetscFree(nodes);CHKERRQ(ierr); 596 } 597 598 ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr); 599 if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);} 600 601 #if defined(PETSC_USE_SOCKET_VIEWER) 602 /* 603 Activates new sockets for zope if needed 604 */ 605 ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr); 606 ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr); 607 if (flgz){ 608 int sockfd; 609 char hostname[256]; 610 char username[256]; 611 int remoteport = 9999; 612 613 ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr); 614 if (!hostname[0]){ 615 ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr); 616 } 617 ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr); 618 ierr = PetscGetUserName(username, 256);CHKERRQ(ierr); 619 PETSC_ZOPEFD = fdopen(sockfd, "w"); 620 if (flgzout){ 621 PETSC_STDOUT = PETSC_ZOPEFD; 622 fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username); 623 fprintf(PETSC_STDOUT, "<<<start>>>"); 624 } else { 625 fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username); 626 fprintf(PETSC_ZOPEFD, "<<<start>>>"); 627 } 628 } 629 #endif 630 #if defined(PETSC_USE_SERVER) 631 ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr); 632 if (flgz){ 633 PetscInt port = PETSC_DECIDE; 634 ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr); 635 ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr); 636 } 637 #endif 638 639 /* 640 Setup profiling and logging 641 */ 642 #if defined (PETSC_USE_INFO) 643 { 644 char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0; 645 ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr); 646 if (flg1 && logname[0]) { 647 ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr); 648 } else if (flg1) { 649 ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr); 650 } 651 } 652 #endif 653 #if defined(PETSC_USE_LOG) 654 mname[0] = 0; 655 ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); 656 if (flg1) { 657 if (mname[0]) { 658 ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr); 659 } else { 660 ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr); 661 } 662 } 663 #if defined(PETSC_HAVE_MPE) 664 flg1 = PETSC_FALSE; 665 ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr); 666 if (flg1) PetscLogMPEBegin(); 667 #endif 668 flg1 = PETSC_FALSE; 669 flg2 = PETSC_FALSE; 670 flg3 = PETSC_FALSE; 671 ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr); 672 ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr); 673 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr); 674 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr); 675 if (flg1) { ierr = PetscLogAllBegin();CHKERRQ(ierr); } 676 else if (flg2 || flg3 || flg4) { ierr = PetscLogBegin();CHKERRQ(ierr);} 677 678 ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr); 679 if (flg1) { 680 char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN]; 681 FILE *file; 682 if (mname[0]) { 683 sprintf(name,"%s.%d",mname,rank); 684 ierr = PetscFixFilename(name,fname);CHKERRQ(ierr); 685 file = fopen(fname,"w"); 686 if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname); 687 } else { 688 file = PETSC_STDOUT; 689 } 690 ierr = PetscLogTraceBegin(file);CHKERRQ(ierr); 691 } 692 #endif 693 694 /* 695 Setup building of stack frames for all function calls 696 */ 697 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD) 698 ierr = PetscStackCreate();CHKERRQ(ierr); 699 #endif 700 701 ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr); 702 703 /* 704 Print basic help message 705 */ 706 ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr); 707 if (flg1) { 708 ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr); 709 ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr); 710 ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr); 711 ierr = (*PetscHelpPrintf)(comm," only when run in the debugger\n");CHKERRQ(ierr); 712 ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr); 713 ierr = (*PetscHelpPrintf)(comm," start the debugger in new xterm\n");CHKERRQ(ierr); 714 ierr = (*PetscHelpPrintf)(comm," unless noxterm is given\n");CHKERRQ(ierr); 715 ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr); 716 ierr = (*PetscHelpPrintf)(comm," start all processes in the debugger\n");CHKERRQ(ierr); 717 ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr); 718 ierr = (*PetscHelpPrintf)(comm," emacs jumps to error file\n");CHKERRQ(ierr); 719 ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr); 720 ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr); 721 ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr); 722 ierr = (*PetscHelpPrintf)(comm," waits the delay for you to attach\n");CHKERRQ(ierr); 723 ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr); 724 ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr); 725 ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr); 726 ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr); 727 ierr = (*PetscHelpPrintf)(comm," note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr); 728 ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr); 729 ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr); 730 ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr); 731 ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr); 732 ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr); 733 ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr); 734 ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr); 735 ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr); 736 ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr); 737 ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr); 738 ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr); 739 ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr); 740 ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr); 741 #if defined(PETSC_USE_LOG) 742 ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr); 743 ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr); 744 ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr); 745 #if defined(PETSC_HAVE_MPE) 746 ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr); 747 #endif 748 ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr); 749 #endif 750 ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr); 751 ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr); 752 ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr); 753 ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr); 754 } 755 756 ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr); 757 if (flg1) { 758 ierr = PetscSleep(si);CHKERRQ(ierr); 759 } 760 761 ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); 762 ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr); 763 if (f) { 764 ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr); 765 } 766 767 #if defined(PETSC_HAVE_CUSP) 768 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr); 769 if (flg3) flg1 = PETSC_TRUE; 770 else flg1 = PETSC_FALSE; 771 ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr); 772 if (flg1) synchronizeCUSP = PETSC_TRUE; 773 #endif 774 775 PetscFunctionReturn(0); 776 } 777 778 /**** 'Tree' Thread Pool Functions ****/ 779 void* PetscThreadFunc_Tree(void* arg) { 780 PetscErrorCode iterr; 781 int icorr,ierr; 782 int* pId = (int*)arg; 783 int ThreadId = *pId,Mary = 2,i,SubWorker; 784 PetscBool PeeOn; 785 cpu_set_t mset; 786 //printf("Thread %d In Tree Thread Function\n",ThreadId); 787 icorr = ThreadCoreAffinity[ThreadId]; 788 CPU_ZERO(&mset); 789 CPU_SET(icorr,&mset); 790 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 791 792 if((Mary*ThreadId+1)>(PetscMaxThreads-1)) { 793 PeeOn = PETSC_TRUE; 794 } 795 else { 796 PeeOn = PETSC_FALSE; 797 } 798 if(PeeOn==PETSC_FALSE) { 799 //check your subordinates, wait for them to be ready 800 for(i=1;i<=Mary;i++) { 801 SubWorker = Mary*ThreadId+i; 802 if(SubWorker<PetscMaxThreads) { 803 ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]); 804 while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) { 805 //upon entry, automically releases the lock and blocks 806 //upon return, has the lock 807 ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]); 808 } 809 ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]); 810 } 811 } 812 //your subordinates are now ready 813 } 814 ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]); 815 //update your ready status 816 *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE; 817 if(ThreadId==0) { 818 job_tree.eJobStat = JobCompleted; 819 //signal main 820 ierr = pthread_cond_signal(&main_cond); 821 } 822 else { 823 //tell your boss that you're ready to work 824 ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]); 825 } 826 //the while loop needs to have an exit 827 //the 'main' thread can terminate all the threads by performing a broadcast 828 //and calling FuncFinish 829 while(PetscThreadGo) { 830 //need to check the condition to ensure we don't have to wait 831 //waiting when you don't have to causes problems 832 //also need to check the condition to ensure proper handling of spurious wakeups 833 while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) { 834 //upon entry, automically releases the lock and blocks 835 //upon return, has the lock 836 ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]); 837 *(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE; 838 *(job_tree.arrThreadReady[ThreadId]) = PETSC_FALSE; 839 } 840 if(ThreadId==0) { 841 job_tree.startJob = PETSC_FALSE; 842 job_tree.eJobStat = ThreadsWorking; 843 } 844 ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]); 845 if(PeeOn==PETSC_FALSE) { 846 //tell your subordinates it's time to get to work 847 for(i=1; i<=Mary; i++) { 848 SubWorker = Mary*ThreadId+i; 849 if(SubWorker<PetscMaxThreads) { 850 ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]); 851 } 852 } 853 } 854 //do your job 855 if(job_tree.pdata==NULL) { 856 iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata); 857 } 858 else { 859 iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]); 860 } 861 if(iterr!=0) { 862 ithreaderr = 1; 863 } 864 if(PetscThreadGo) { 865 //reset job, get ready for more 866 if(PeeOn==PETSC_FALSE) { 867 //check your subordinates, waiting for them to be ready 868 //how do you know for a fact that a given subordinate has actually started? 869 for(i=1;i<=Mary;i++) { 870 SubWorker = Mary*ThreadId+i; 871 if(SubWorker<PetscMaxThreads) { 872 ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]); 873 while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) { 874 //upon entry, automically releases the lock and blocks 875 //upon return, has the lock 876 ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]); 877 } 878 ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]); 879 } 880 } 881 //your subordinates are now ready 882 } 883 ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]); 884 *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE; 885 if(ThreadId==0) { 886 job_tree.eJobStat = JobCompleted; //root thread: last thread to complete, guaranteed! 887 //root thread signals 'main' 888 ierr = pthread_cond_signal(&main_cond); 889 } 890 else { 891 //signal your boss before you go to sleep 892 ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]); 893 } 894 } 895 } 896 return NULL; 897 } 898 899 #undef __FUNCT__ 900 #define __FUNCT__ "PetscThreadInitialize_Tree" 901 void* PetscThreadInitialize_Tree(PetscInt N) { 902 PetscInt i,ierr; 903 int status; 904 905 if(PetscUseThreadPool) { 906 size_t Val1 = (size_t)CACHE_LINE_SIZE; 907 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 908 arrmutex = (char*)memalign(Val1,Val2); 909 arrcond1 = (char*)memalign(Val1,Val2); 910 arrcond2 = (char*)memalign(Val1,Val2); 911 arrstart = (char*)memalign(Val1,Val2); 912 arrready = (char*)memalign(Val1,Val2); 913 job_tree.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 914 job_tree.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 915 job_tree.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 916 job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 917 job_tree.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 918 //initialize job structure 919 for(i=0; i<PetscMaxThreads; i++) { 920 job_tree.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 921 job_tree.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 922 job_tree.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 923 job_tree.arrThreadStarted[i] = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i); 924 job_tree.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 925 } 926 for(i=0; i<PetscMaxThreads; i++) { 927 ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL); 928 ierr = pthread_cond_init(job_tree.cond1array[i],NULL); 929 ierr = pthread_cond_init(job_tree.cond2array[i],NULL); 930 *(job_tree.arrThreadStarted[i]) = PETSC_FALSE; 931 *(job_tree.arrThreadReady[i]) = PETSC_FALSE; 932 } 933 job_tree.pfunc = NULL; 934 job_tree.pdata = (void**)malloc(N*sizeof(void*)); 935 job_tree.startJob = PETSC_FALSE; 936 job_tree.eJobStat = JobInitiated; 937 pVal = (int*)malloc(N*sizeof(int)); 938 //allocate memory in the heap for the thread structure 939 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 940 //create threads 941 for(i=0; i<N; i++) { 942 pVal[i] = i; 943 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 944 //error check 945 } 946 } 947 else { 948 //do nothing 949 } 950 return NULL; 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "PetscThreadFinalize_Tree" 955 PetscErrorCode PetscThreadFinalize_Tree() { 956 int i,ierr; 957 void* jstatus; 958 959 PetscFunctionBegin; 960 961 if(PetscUseThreadPool) { 962 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 963 //join the threads 964 for(i=0; i<PetscMaxThreads; i++) { 965 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 966 //do error checking 967 } 968 free(PetscThreadPoint); 969 free(arrmutex); 970 free(arrcond1); 971 free(arrcond2); 972 free(arrstart); 973 free(arrready); 974 free(job_tree.pdata); 975 free(pVal); 976 } 977 else { 978 } 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MainWait_Tree" 984 void MainWait_Tree() { 985 int ierr; 986 ierr = pthread_mutex_lock(job_tree.mutexarray[0]); 987 while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) { 988 ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]); 989 } 990 ierr = pthread_mutex_unlock(job_tree.mutexarray[0]); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MainJob_Tree" 995 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) { 996 int i,ierr; 997 PetscErrorCode ijoberr = 0; 998 if(PetscUseThreadPool) { 999 MainWait(); 1000 job_tree.pfunc = pFunc; 1001 job_tree.pdata = data; 1002 job_tree.startJob = PETSC_TRUE; 1003 for(i=0; i<PetscMaxThreads; i++) { 1004 *(job_tree.arrThreadStarted[i]) = PETSC_FALSE; 1005 } 1006 job_tree.eJobStat = JobInitiated; 1007 ierr = pthread_cond_signal(job_tree.cond2array[0]); 1008 if(pFunc!=FuncFinish) { 1009 MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any) 1010 } 1011 } 1012 else { 1013 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1014 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1015 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1016 free(apThread); 1017 } 1018 if(ithreaderr) { 1019 ijoberr = ithreaderr; 1020 } 1021 return ijoberr; 1022 } 1023 /**** ****/ 1024 1025 /**** 'Main' Thread Pool Functions ****/ 1026 void* PetscThreadFunc_Main(void* arg) { 1027 PetscErrorCode iterr; 1028 int icorr,ierr; 1029 int* pId = (int*)arg; 1030 int ThreadId = *pId; 1031 cpu_set_t mset; 1032 //printf("Thread %d In Main Thread Function\n",ThreadId); 1033 icorr = ThreadCoreAffinity[ThreadId]; 1034 CPU_ZERO(&mset); 1035 CPU_SET(icorr,&mset); 1036 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1037 1038 ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]); 1039 //update your ready status 1040 *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE; 1041 //tell the BOSS that you're ready to work before you go to sleep 1042 ierr = pthread_cond_signal(job_main.cond1array[ThreadId]); 1043 1044 //the while loop needs to have an exit 1045 //the 'main' thread can terminate all the threads by performing a broadcast 1046 //and calling FuncFinish 1047 while(PetscThreadGo) { 1048 //need to check the condition to ensure we don't have to wait 1049 //waiting when you don't have to causes problems 1050 //also need to check the condition to ensure proper handling of spurious wakeups 1051 while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) { 1052 //upon entry, atomically releases the lock and blocks 1053 //upon return, has the lock 1054 ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]); 1055 //*(job_main.arrThreadReady[ThreadId]) = PETSC_FALSE; 1056 } 1057 ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]); 1058 //do your job 1059 if(job_main.pdata==NULL) { 1060 iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata); 1061 } 1062 else { 1063 iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]); 1064 } 1065 if(iterr!=0) { 1066 ithreaderr = 1; 1067 } 1068 if(PetscThreadGo) { 1069 //reset job, get ready for more 1070 ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]); 1071 *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE; 1072 //tell the BOSS that you're ready to work before you go to sleep 1073 ierr = pthread_cond_signal(job_main.cond1array[ThreadId]); 1074 } 1075 } 1076 return NULL; 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "PetscThreadInitialize_Main" 1081 void* PetscThreadInitialize_Main(PetscInt N) { 1082 PetscInt i,ierr; 1083 int status; 1084 1085 if(PetscUseThreadPool) { 1086 size_t Val1 = (size_t)CACHE_LINE_SIZE; 1087 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 1088 arrmutex = (char*)memalign(Val1,Val2); 1089 arrcond1 = (char*)memalign(Val1,Val2); 1090 arrcond2 = (char*)memalign(Val1,Val2); 1091 arrstart = (char*)memalign(Val1,Val2); 1092 arrready = (char*)memalign(Val1,Val2); 1093 job_main.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 1094 job_main.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1095 job_main.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1096 job_main.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1097 //initialize job structure 1098 for(i=0; i<PetscMaxThreads; i++) { 1099 job_main.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 1100 job_main.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 1101 job_main.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 1102 job_main.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 1103 } 1104 for(i=0; i<PetscMaxThreads; i++) { 1105 ierr = pthread_mutex_init(job_main.mutexarray[i],NULL); 1106 ierr = pthread_cond_init(job_main.cond1array[i],NULL); 1107 ierr = pthread_cond_init(job_main.cond2array[i],NULL); 1108 *(job_main.arrThreadReady[i]) = PETSC_FALSE; 1109 } 1110 job_main.pfunc = NULL; 1111 job_main.pdata = (void**)malloc(N*sizeof(void*)); 1112 pVal = (int*)malloc(N*sizeof(int)); 1113 //allocate memory in the heap for the thread structure 1114 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1115 //create threads 1116 for(i=0; i<N; i++) { 1117 pVal[i] = i; 1118 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1119 //error check 1120 } 1121 } 1122 else { 1123 } 1124 return NULL; 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "PetscThreadFinalize_Main" 1129 PetscErrorCode PetscThreadFinalize_Main() { 1130 int i,ierr; 1131 void* jstatus; 1132 1133 PetscFunctionBegin; 1134 1135 if(PetscUseThreadPool) { 1136 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 1137 //join the threads 1138 for(i=0; i<PetscMaxThreads; i++) { 1139 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1140 //do error checking 1141 } 1142 free(PetscThreadPoint); 1143 free(arrmutex); 1144 free(arrcond1); 1145 free(arrcond2); 1146 free(arrstart); 1147 free(arrready); 1148 free(job_main.pdata); 1149 free(pVal); 1150 } 1151 else { 1152 } 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MainWait_Main" 1158 void MainWait_Main() { 1159 int i,ierr; 1160 for(i=0; i<PetscMaxThreads; i++) { 1161 ierr = pthread_mutex_lock(job_main.mutexarray[i]); 1162 while(*(job_main.arrThreadReady[i])==PETSC_FALSE) { 1163 ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]); 1164 } 1165 ierr = pthread_mutex_unlock(job_main.mutexarray[i]); 1166 } 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MainJob_Main" 1171 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) { 1172 int i,ierr; 1173 PetscErrorCode ijoberr = 0; 1174 if(PetscUseThreadPool) { 1175 MainWait(); //you know everyone is waiting to be signalled! 1176 job_main.pfunc = pFunc; 1177 job_main.pdata = data; 1178 for(i=0; i<PetscMaxThreads; i++) { 1179 *(job_main.arrThreadReady[i]) = PETSC_FALSE; //why do this? suppose you get into MainWait first 1180 } 1181 //tell the threads to go to work 1182 for(i=0; i<PetscMaxThreads; i++) { 1183 ierr = pthread_cond_signal(job_main.cond2array[i]); 1184 } 1185 if(pFunc!=FuncFinish) { 1186 MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any) 1187 } 1188 } 1189 else { 1190 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1191 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1192 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1193 free(apThread); 1194 } 1195 if(ithreaderr) { 1196 ijoberr = ithreaderr; 1197 } 1198 return ijoberr; 1199 } 1200 /**** ****/ 1201 1202 /**** Chain Thread Functions ****/ 1203 void* PetscThreadFunc_Chain(void* arg) { 1204 PetscErrorCode iterr; 1205 int icorr,ierr; 1206 int* pId = (int*)arg; 1207 int ThreadId = *pId; 1208 int SubWorker = ThreadId + 1; 1209 PetscBool PeeOn; 1210 cpu_set_t mset; 1211 //printf("Thread %d In Chain Thread Function\n",ThreadId); 1212 icorr = ThreadCoreAffinity[ThreadId]; 1213 CPU_ZERO(&mset); 1214 CPU_SET(icorr,&mset); 1215 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1216 1217 if(ThreadId==(PetscMaxThreads-1)) { 1218 PeeOn = PETSC_TRUE; 1219 } 1220 else { 1221 PeeOn = PETSC_FALSE; 1222 } 1223 if(PeeOn==PETSC_FALSE) { 1224 //check your subordinate, wait for him to be ready 1225 ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]); 1226 while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) { 1227 //upon entry, automically releases the lock and blocks 1228 //upon return, has the lock 1229 ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]); 1230 } 1231 ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]); 1232 //your subordinate is now ready 1233 } 1234 ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]); 1235 //update your ready status 1236 *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE; 1237 if(ThreadId==0) { 1238 job_chain.eJobStat = JobCompleted; 1239 //signal main 1240 ierr = pthread_cond_signal(&main_cond); 1241 } 1242 else { 1243 //tell your boss that you're ready to work 1244 ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]); 1245 } 1246 //the while loop needs to have an exit 1247 //the 'main' thread can terminate all the threads by performing a broadcast 1248 //and calling FuncFinish 1249 while(PetscThreadGo) { 1250 //need to check the condition to ensure we don't have to wait 1251 //waiting when you don't have to causes problems 1252 //also need to check the condition to ensure proper handling of spurious wakeups 1253 while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) { 1254 //upon entry, automically releases the lock and blocks 1255 //upon return, has the lock 1256 ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]); 1257 *(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE; 1258 *(job_chain.arrThreadReady[ThreadId]) = PETSC_FALSE; 1259 } 1260 if(ThreadId==0) { 1261 job_chain.startJob = PETSC_FALSE; 1262 job_chain.eJobStat = ThreadsWorking; 1263 } 1264 ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]); 1265 if(PeeOn==PETSC_FALSE) { 1266 //tell your subworker it's time to get to work 1267 ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]); 1268 } 1269 //do your job 1270 if(job_chain.pdata==NULL) { 1271 iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata); 1272 } 1273 else { 1274 iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]); 1275 } 1276 if(iterr!=0) { 1277 ithreaderr = 1; 1278 } 1279 if(PetscThreadGo) { 1280 //reset job, get ready for more 1281 if(PeeOn==PETSC_FALSE) { 1282 //check your subordinate, wait for him to be ready 1283 //how do you know for a fact that your subordinate has actually started? 1284 ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]); 1285 while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) { 1286 //upon entry, automically releases the lock and blocks 1287 //upon return, has the lock 1288 ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]); 1289 } 1290 ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]); 1291 //your subordinate is now ready 1292 } 1293 ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]); 1294 *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE; 1295 if(ThreadId==0) { 1296 job_chain.eJobStat = JobCompleted; //foreman: last thread to complete, guaranteed! 1297 //root thread (foreman) signals 'main' 1298 ierr = pthread_cond_signal(&main_cond); 1299 } 1300 else { 1301 //signal your boss before you go to sleep 1302 ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]); 1303 } 1304 } 1305 } 1306 return NULL; 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "PetscThreadInitialize_Chain" 1311 void* PetscThreadInitialize_Chain(PetscInt N) { 1312 PetscInt i,ierr; 1313 int status; 1314 1315 if(PetscUseThreadPool) { 1316 size_t Val1 = (size_t)CACHE_LINE_SIZE; 1317 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 1318 arrmutex = (char*)memalign(Val1,Val2); 1319 arrcond1 = (char*)memalign(Val1,Val2); 1320 arrcond2 = (char*)memalign(Val1,Val2); 1321 arrstart = (char*)memalign(Val1,Val2); 1322 arrready = (char*)memalign(Val1,Val2); 1323 job_chain.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 1324 job_chain.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1325 job_chain.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1326 job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1327 job_chain.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1328 //initialize job structure 1329 for(i=0; i<PetscMaxThreads; i++) { 1330 job_chain.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 1331 job_chain.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 1332 job_chain.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 1333 job_chain.arrThreadStarted[i] = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i); 1334 job_chain.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 1335 } 1336 for(i=0; i<PetscMaxThreads; i++) { 1337 ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL); 1338 ierr = pthread_cond_init(job_chain.cond1array[i],NULL); 1339 ierr = pthread_cond_init(job_chain.cond2array[i],NULL); 1340 *(job_chain.arrThreadStarted[i]) = PETSC_FALSE; 1341 *(job_chain.arrThreadReady[i]) = PETSC_FALSE; 1342 } 1343 job_chain.pfunc = NULL; 1344 job_chain.pdata = (void**)malloc(N*sizeof(void*)); 1345 job_chain.startJob = PETSC_FALSE; 1346 job_chain.eJobStat = JobInitiated; 1347 pVal = (int*)malloc(N*sizeof(int)); 1348 //allocate memory in the heap for the thread structure 1349 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1350 //create threads 1351 for(i=0; i<N; i++) { 1352 pVal[i] = i; 1353 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1354 //error check 1355 } 1356 } 1357 else { 1358 } 1359 return NULL; 1360 } 1361 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "PetscThreadFinalize_Chain" 1365 PetscErrorCode PetscThreadFinalize_Chain() { 1366 int i,ierr; 1367 void* jstatus; 1368 1369 PetscFunctionBegin; 1370 1371 if(PetscUseThreadPool) { 1372 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 1373 //join the threads 1374 for(i=0; i<PetscMaxThreads; i++) { 1375 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1376 //do error checking 1377 } 1378 free(PetscThreadPoint); 1379 free(arrmutex); 1380 free(arrcond1); 1381 free(arrcond2); 1382 free(arrstart); 1383 free(arrready); 1384 free(job_chain.pdata); 1385 free(pVal); 1386 } 1387 else { 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MainWait_Chain" 1394 void MainWait_Chain() { 1395 int ierr; 1396 ierr = pthread_mutex_lock(job_chain.mutexarray[0]); 1397 while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) { 1398 ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]); 1399 } 1400 ierr = pthread_mutex_unlock(job_chain.mutexarray[0]); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MainJob_Chain" 1405 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) { 1406 int i,ierr; 1407 PetscErrorCode ijoberr = 0; 1408 if(PetscUseThreadPool) { 1409 MainWait(); 1410 job_chain.pfunc = pFunc; 1411 job_chain.pdata = data; 1412 job_chain.startJob = PETSC_TRUE; 1413 for(i=0; i<PetscMaxThreads; i++) { 1414 *(job_chain.arrThreadStarted[i]) = PETSC_FALSE; 1415 } 1416 job_chain.eJobStat = JobInitiated; 1417 ierr = pthread_cond_signal(job_chain.cond2array[0]); 1418 if(pFunc!=FuncFinish) { 1419 MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any) 1420 } 1421 } 1422 else { 1423 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1424 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1425 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1426 free(apThread); 1427 } 1428 if(ithreaderr) { 1429 ijoberr = ithreaderr; 1430 } 1431 return ijoberr; 1432 } 1433 /**** ****/ 1434 1435 /**** True Thread Functions ****/ 1436 void* PetscThreadFunc_True(void* arg) { 1437 int icorr,ierr,iVal; 1438 int* pId = (int*)arg; 1439 int ThreadId = *pId; 1440 PetscErrorCode iterr; 1441 cpu_set_t mset; 1442 //printf("Thread %d In True Pool Thread Function\n",ThreadId); 1443 icorr = ThreadCoreAffinity[ThreadId]; 1444 CPU_ZERO(&mset); 1445 CPU_SET(icorr,&mset); 1446 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1447 1448 ierr = pthread_mutex_lock(&job_true.mutex); 1449 job_true.iNumReadyThreads++; 1450 if(job_true.iNumReadyThreads==PetscMaxThreads) { 1451 ierr = pthread_cond_signal(&main_cond); 1452 } 1453 //the while loop needs to have an exit 1454 //the 'main' thread can terminate all the threads by performing a broadcast 1455 //and calling FuncFinish 1456 while(PetscThreadGo) { 1457 //need to check the condition to ensure we don't have to wait 1458 //waiting when you don't have to causes problems 1459 //also need to wait if another thread sneaks in and messes with the predicate 1460 while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) { 1461 //upon entry, automically releases the lock and blocks 1462 //upon return, has the lock 1463 ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex); 1464 } 1465 job_true.startJob = PETSC_FALSE; 1466 job_true.iNumJobThreads--; 1467 job_true.iNumReadyThreads--; 1468 iVal = PetscMaxThreads-job_true.iNumReadyThreads-1; 1469 pthread_mutex_unlock(&job_true.mutex); 1470 if(job_true.pdata==NULL) { 1471 iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata); 1472 } 1473 else { 1474 iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]); 1475 } 1476 if(iterr!=0) { 1477 ithreaderr = 1; 1478 } 1479 //the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads 1480 //what happens if a thread finishes before they all start? BAD! 1481 //what happens if a thread finishes before any else start? BAD! 1482 pthread_barrier_wait(job_true.pbarr); //ensures all threads are finished 1483 //reset job 1484 if(PetscThreadGo) { 1485 pthread_mutex_lock(&job_true.mutex); 1486 job_true.iNumReadyThreads++; 1487 if(job_true.iNumReadyThreads==PetscMaxThreads) { 1488 //signal the 'main' thread that the job is done! (only done once) 1489 ierr = pthread_cond_signal(&main_cond); 1490 } 1491 } 1492 } 1493 return NULL; 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "PetscThreadInitialize_True" 1498 void* PetscThreadInitialize_True(PetscInt N) { 1499 PetscInt i; 1500 int status; 1501 1502 if(PetscUseThreadPool) { 1503 pVal = (int*)malloc(N*sizeof(int)); 1504 //allocate memory in the heap for the thread structure 1505 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1506 BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); //BarrPoint[0] makes no sense, don't use it! 1507 job_true.pdata = (void**)malloc(N*sizeof(void*)); 1508 for(i=0; i<N; i++) { 1509 pVal[i] = i; 1510 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1511 //error check to ensure proper thread creation 1512 status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1); 1513 //error check 1514 } 1515 } 1516 else { 1517 } 1518 return NULL; 1519 } 1520 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "PetscThreadFinalize_True" 1524 PetscErrorCode PetscThreadFinalize_True() { 1525 int i,ierr; 1526 void* jstatus; 1527 1528 PetscFunctionBegin; 1529 1530 if(PetscUseThreadPool) { 1531 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 1532 //join the threads 1533 for(i=0; i<PetscMaxThreads; i++) { 1534 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1535 //do error checking 1536 } 1537 free(BarrPoint); 1538 free(PetscThreadPoint); 1539 } 1540 else { 1541 } 1542 PetscFunctionReturn(0); 1543 } 1544 1545 #undef __FUNCT__ 1546 #define __FUNCT__ "MainWait_True" 1547 void MainWait_True() { 1548 int ierr; 1549 while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) { 1550 ierr = pthread_cond_wait(&main_cond,&job_true.mutex); 1551 } 1552 ierr = pthread_mutex_unlock(&job_true.mutex); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MainJob_True" 1557 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) { 1558 int ierr; 1559 PetscErrorCode ijoberr = 0; 1560 if(PetscUseThreadPool) { 1561 MainWait(); 1562 job_true.pfunc = pFunc; 1563 job_true.pdata = data; 1564 job_true.pbarr = &BarrPoint[n]; 1565 job_true.iNumJobThreads = n; 1566 job_true.startJob = PETSC_TRUE; 1567 ierr = pthread_cond_broadcast(&job_true.cond); 1568 if(pFunc!=FuncFinish) { 1569 MainWait(); //why wait after? guarantees that job gets done 1570 } 1571 } 1572 else { 1573 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1574 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1575 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1576 free(apThread); 1577 } 1578 if(ithreaderr) { 1579 ijoberr = ithreaderr; 1580 } 1581 return ijoberr; 1582 } 1583 /**** ****/ 1584 1585 void* FuncFinish(void* arg) { 1586 PetscThreadGo = PETSC_FALSE; 1587 return(0); 1588 } 1589