1 /* 2 3 This file defines part of the initialization of PETSc 4 5 This file uses regular malloc and free because it cannot know 6 what malloc is being used until it has already processed the input. 7 */ 8 9 #define _GNU_SOURCE 10 #include <sched.h> 11 #include <petscsys.h> /*I "petscsys.h" I*/ 12 #if defined(PETSC_USE_PTHREAD) 13 #include <pthread.h> 14 #endif 15 #if defined(PETSC_HAVE_SYS_SYSINFO_H) 16 #include <sys/sysinfo.h> 17 #endif 18 #include <unistd.h> 19 #if defined(PETSC_HAVE_STDLIB_H) 20 #include <stdlib.h> 21 #endif 22 #if defined(PETSC_HAVE_MALLOC_H) 23 #include <malloc.h> 24 #endif 25 #if defined(PETSC_HAVE_VALGRIND) 26 #include <valgrind/valgrind.h> 27 #endif 28 29 /* ------------------------Nasty global variables -------------------------------*/ 30 /* 31 Indicates if PETSc started up MPI, or it was 32 already started before PETSc was initialized. 33 */ 34 PetscBool PetscBeganMPI = PETSC_FALSE; 35 PetscBool PetscInitializeCalled = PETSC_FALSE; 36 PetscBool PetscFinalizeCalled = PETSC_FALSE; 37 PetscBool PetscUseThreadPool = PETSC_FALSE; 38 PetscBool PetscThreadGo = PETSC_TRUE; 39 PetscMPIInt PetscGlobalRank = -1; 40 PetscMPIInt PetscGlobalSize = -1; 41 42 #if defined(PETSC_USE_PTHREAD_CLASSES) 43 PetscMPIInt PetscMaxThreads = 2; 44 pthread_t* PetscThreadPoint; 45 #define PETSC_HAVE_PTHREAD_BARRIER 46 #if defined(PETSC_HAVE_PTHREAD_BARRIER) 47 pthread_barrier_t* BarrPoint; /* used by 'true' thread pool */ 48 #endif 49 PetscErrorCode ithreaderr = 0; 50 int* pVal; 51 52 #define CACHE_LINE_SIZE 64 /* used by 'chain', 'main','tree' thread pools */ 53 int* ThreadCoreAffinity; 54 55 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat; /* used by 'chain','tree' thread pool */ 56 57 typedef struct { 58 pthread_mutex_t** mutexarray; 59 pthread_cond_t** cond1array; 60 pthread_cond_t** cond2array; 61 void* (*pfunc)(void*); 62 void** pdata; 63 PetscBool startJob; 64 estat eJobStat; 65 PetscBool** arrThreadStarted; 66 PetscBool** arrThreadReady; 67 } sjob_tree; 68 sjob_tree job_tree; 69 typedef struct { 70 pthread_mutex_t** mutexarray; 71 pthread_cond_t** cond1array; 72 pthread_cond_t** cond2array; 73 void* (*pfunc)(void*); 74 void** pdata; 75 PetscBool** arrThreadReady; 76 } sjob_main; 77 sjob_main job_main; 78 typedef struct { 79 pthread_mutex_t** mutexarray; 80 pthread_cond_t** cond1array; 81 pthread_cond_t** cond2array; 82 void* (*pfunc)(void*); 83 void** pdata; 84 PetscBool startJob; 85 estat eJobStat; 86 PetscBool** arrThreadStarted; 87 PetscBool** arrThreadReady; 88 } sjob_chain; 89 sjob_chain job_chain; 90 #if defined(PETSC_HAVE_PTHREAD_BARRIER) 91 typedef struct { 92 pthread_mutex_t mutex; 93 pthread_cond_t cond; 94 void* (*pfunc)(void*); 95 void** pdata; 96 pthread_barrier_t* pbarr; 97 int iNumJobThreads; 98 int iNumReadyThreads; 99 PetscBool startJob; 100 } sjob_true; 101 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE}; 102 #endif 103 104 pthread_cond_t main_cond = PTHREAD_COND_INITIALIZER; /* used by 'true', 'chain','tree' thread pools */ 105 char* arrmutex; /* used by 'chain','main','tree' thread pools */ 106 char* arrcond1; /* used by 'chain','main','tree' thread pools */ 107 char* arrcond2; /* used by 'chain','main','tree' thread pools */ 108 char* arrstart; /* used by 'chain','main','tree' thread pools */ 109 char* arrready; /* used by 'chain','main','tree' thread pools */ 110 111 /* Function Pointers */ 112 void* (*PetscThreadFunc)(void*) = NULL; 113 void* (*PetscThreadInitialize)(PetscInt) = NULL; 114 PetscErrorCode (*PetscThreadFinalize)(void) = NULL; 115 void (*MainWait)(void) = NULL; 116 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL; 117 /**** Tree Functions ****/ 118 void* PetscThreadFunc_Tree(void*); 119 void* PetscThreadInitialize_Tree(PetscInt); 120 PetscErrorCode PetscThreadFinalize_Tree(void); 121 void MainWait_Tree(void); 122 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt); 123 /**** Main Functions ****/ 124 void* PetscThreadFunc_Main(void*); 125 void* PetscThreadInitialize_Main(PetscInt); 126 PetscErrorCode PetscThreadFinalize_Main(void); 127 void MainWait_Main(void); 128 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt); 129 /**** Chain Functions ****/ 130 void* PetscThreadFunc_Chain(void*); 131 void* PetscThreadInitialize_Chain(PetscInt); 132 PetscErrorCode PetscThreadFinalize_Chain(void); 133 void MainWait_Chain(void); 134 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt); 135 /**** True Functions ****/ 136 void* PetscThreadFunc_True(void*); 137 void* PetscThreadInitialize_True(PetscInt); 138 PetscErrorCode PetscThreadFinalize_True(void); 139 void MainWait_True(void); 140 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt); 141 /**** ****/ 142 143 void* FuncFinish(void*); 144 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**); 145 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*); 146 #endif 147 148 #if defined(PETSC_USE_COMPLEX) 149 #if defined(PETSC_COMPLEX_INSTANTIATE) 150 template <> class std::complex<double>; /* instantiate complex template class */ 151 #endif 152 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX) 153 MPI_Datatype MPI_C_DOUBLE_COMPLEX; 154 MPI_Datatype MPI_C_COMPLEX; 155 #endif 156 PetscScalar PETSC_i; 157 #else 158 PetscScalar PETSC_i = 0.0; 159 #endif 160 #if defined(PETSC_USE_REAL___FLOAT128) 161 MPI_Datatype MPIU___FLOAT128 = 0; 162 #endif 163 MPI_Datatype MPIU_2SCALAR = 0; 164 MPI_Datatype MPIU_2INT = 0; 165 166 /* 167 These are needed by petscbt.h 168 */ 169 #include <petscbt.h> 170 char _BT_mask = ' '; 171 char _BT_c = ' '; 172 PetscInt _BT_idx = 0; 173 174 /* 175 Function that is called to display all error messages 176 */ 177 PetscErrorCode (*PetscErrorPrintf)(const char [],...) = PetscErrorPrintfDefault; 178 PetscErrorCode (*PetscHelpPrintf)(MPI_Comm,const char [],...) = PetscHelpPrintfDefault; 179 #if defined(PETSC_HAVE_MATLAB_ENGINE) 180 PetscErrorCode (*PetscVFPrintf)(FILE*,const char[],va_list) = PetscVFPrintf_Matlab; 181 #else 182 PetscErrorCode (*PetscVFPrintf)(FILE*,const char[],va_list) = PetscVFPrintfDefault; 183 #endif 184 /* 185 This is needed to turn on/off cusp synchronization */ 186 PetscBool synchronizeCUSP = PETSC_FALSE; 187 188 /* ------------------------------------------------------------------------------*/ 189 /* 190 Optional file where all PETSc output from various prints is saved 191 */ 192 FILE *petsc_history = PETSC_NULL; 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "PetscOpenHistoryFile" 196 PetscErrorCode PetscOpenHistoryFile(const char filename[],FILE **fd) 197 { 198 PetscErrorCode ierr; 199 PetscMPIInt rank,size; 200 char pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64]; 201 char version[256]; 202 203 PetscFunctionBegin; 204 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 205 if (!rank) { 206 char arch[10]; 207 int err; 208 PetscViewer viewer; 209 210 ierr = PetscGetArchType(arch,10);CHKERRQ(ierr); 211 ierr = PetscGetDate(date,64);CHKERRQ(ierr); 212 ierr = PetscGetVersion(version,256);CHKERRQ(ierr); 213 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 214 if (filename) { 215 ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr); 216 } else { 217 ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr); 218 ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr); 219 ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr); 220 } 221 222 *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname); 223 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 224 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr); 225 ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 226 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr); 227 ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr); 228 ierr = PetscOptionsView(viewer);CHKERRQ(ierr); 229 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 230 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 231 err = fflush(*fd); 232 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "PetscCloseHistoryFile" 239 PetscErrorCode PetscCloseHistoryFile(FILE **fd) 240 { 241 PetscErrorCode ierr; 242 PetscMPIInt rank; 243 char date[64]; 244 int err; 245 246 PetscFunctionBegin; 247 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 248 if (!rank) { 249 ierr = PetscGetDate(date,64);CHKERRQ(ierr); 250 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 251 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr); 252 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 253 err = fflush(*fd); 254 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); 255 err = fclose(*fd); 256 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file"); 257 } 258 PetscFunctionReturn(0); 259 } 260 261 /* ------------------------------------------------------------------------------*/ 262 263 /* 264 This is ugly and probably belongs somewhere else, but I want to 265 be able to put a true MPI abort error handler with command line args. 266 267 This is so MPI errors in the debugger will leave all the stack 268 frames. The default MP_Abort() cleans up and exits thus providing no useful information 269 in the debugger hence we call abort() instead of MPI_Abort(). 270 */ 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "Petsc_MPI_AbortOnError" 274 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag) 275 { 276 PetscFunctionBegin; 277 (*PetscErrorPrintf)("MPI error %d\n",*flag); 278 abort(); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "Petsc_MPI_DebuggerOnError" 283 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag) 284 { 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 (*PetscErrorPrintf)("MPI error %d\n",*flag); 289 ierr = PetscAttachDebugger(); 290 if (ierr) { /* hopeless so get out */ 291 MPI_Abort(*comm,*flag); 292 } 293 } 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "PetscEnd" 297 /*@C 298 PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one 299 wishes a clean exit somewhere deep in the program. 300 301 Collective on PETSC_COMM_WORLD 302 303 Options Database Keys are the same as for PetscFinalize() 304 305 Level: advanced 306 307 Note: 308 See PetscInitialize() for more general runtime options. 309 310 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize() 311 @*/ 312 PetscErrorCode PetscEnd(void) 313 { 314 PetscFunctionBegin; 315 PetscFinalize(); 316 exit(0); 317 return 0; 318 } 319 320 PetscBool PetscOptionsPublish = PETSC_FALSE; 321 extern PetscErrorCode PetscSetUseTrMalloc_Private(void); 322 extern PetscBool petscsetmallocvisited; 323 static char emacsmachinename[256]; 324 325 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0; 326 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm) = 0; 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "PetscSetHelpVersionFunctions" 330 /*@C 331 PetscSetHelpVersionFunctions - Sets functions that print help and version information 332 before the PETSc help and version information is printed. Must call BEFORE PetscInitialize(). 333 This routine enables a "higher-level" package that uses PETSc to print its messages first. 334 335 Input Parameter: 336 + help - the help function (may be PETSC_NULL) 337 - version - the version function (may be PETSC_NULL) 338 339 Level: developer 340 341 Concepts: package help message 342 343 @*/ 344 PetscErrorCode PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm)) 345 { 346 PetscFunctionBegin; 347 PetscExternalHelpFunction = help; 348 PetscExternalVersionFunction = version; 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "PetscOptionsCheckInitial_Private" 354 PetscErrorCode PetscOptionsCheckInitial_Private(void) 355 { 356 char string[64],mname[PETSC_MAX_PATH_LEN],*f; 357 MPI_Comm comm = PETSC_COMM_WORLD; 358 PetscBool flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout; 359 PetscErrorCode ierr; 360 PetscReal si; 361 int i; 362 PetscMPIInt rank; 363 char version[256]; 364 365 PetscFunctionBegin; 366 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 367 368 /* 369 Setup the memory management; support for tracing malloc() usage 370 */ 371 ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr); 372 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD) 373 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr); 374 if ((!flg2 || flg1) && !petscsetmallocvisited) { 375 #if defined(PETSC_HAVE_VALGRIND) 376 if (flg2 || !(RUNNING_ON_VALGRIND)) { 377 /* turn off default -malloc if valgrind is being used */ 378 #endif 379 ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr); 380 #if defined(PETSC_HAVE_VALGRIND) 381 } 382 #endif 383 } 384 #else 385 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr); 386 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr); 387 if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);} 388 #endif 389 if (flg3) { 390 ierr = PetscMallocSetDumpLog();CHKERRQ(ierr); 391 } 392 flg1 = PETSC_FALSE; 393 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr); 394 if (flg1) { 395 ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr); 396 ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr); 397 } 398 399 flg1 = PETSC_FALSE; 400 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr); 401 if (!flg1) { 402 flg1 = PETSC_FALSE; 403 ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr); 404 } 405 if (flg1) { 406 ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr); 407 } 408 409 /* 410 Set the display variable for graphics 411 */ 412 ierr = PetscSetDisplay();CHKERRQ(ierr); 413 414 415 /* 416 else { 417 //need to define these in the case on 'no threads' or 'thread create/destroy' 418 //could take any of the above versions 419 PetscThreadInitialize = &PetscThreadInitialize_True; 420 PetscThreadFinalize = &PetscThreadFinalize_True; 421 MainJob = &MainJob_True; 422 } 423 Print the PETSc version information 424 */ 425 ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr); 426 ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr); 427 ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr); 428 if (flg1 || flg2 || flg3){ 429 430 /* 431 Print "higher-level" package version message 432 */ 433 if (PetscExternalVersionFunction) { 434 ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr); 435 } 436 437 ierr = PetscGetVersion(version,256);CHKERRQ(ierr); 438 ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\ 439 ------------------------------\n");CHKERRQ(ierr); 440 ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr); 441 ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr); 442 ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr); 443 ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr); 444 ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr); 445 ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr); 446 ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\ 447 ------------------------------\n");CHKERRQ(ierr); 448 } 449 450 /* 451 Print "higher-level" package help message 452 */ 453 if (flg3){ 454 if (PetscExternalHelpFunction) { 455 ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr); 456 } 457 } 458 459 /* 460 Setup the error handling 461 */ 462 flg1 = PETSC_FALSE; 463 ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr); 464 if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);} 465 flg1 = PETSC_FALSE; 466 ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr); 467 if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);} 468 flg1 = PETSC_FALSE; 469 ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr); 470 if (flg1) { 471 ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr); 472 } 473 flg1 = PETSC_FALSE; 474 ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr); 475 if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);} 476 flg1 = PETSC_FALSE; 477 ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr); 478 if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);} 479 480 /* 481 Setup debugger information 482 */ 483 ierr = PetscSetDefaultDebugger();CHKERRQ(ierr); 484 ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr); 485 if (flg1) { 486 MPI_Errhandler err_handler; 487 488 ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr); 489 ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr); 490 ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr); 491 ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr); 492 } 493 ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr); 494 if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); } 495 ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr); 496 ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr); 497 if (flg1 || flg2) { 498 PetscMPIInt size; 499 PetscInt lsize,*nodes; 500 MPI_Errhandler err_handler; 501 /* 502 we have to make sure that all processors have opened 503 connections to all other processors, otherwise once the 504 debugger has stated it is likely to receive a SIGUSR1 505 and kill the program. 506 */ 507 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 508 if (size > 2) { 509 PetscMPIInt dummy = 0; 510 MPI_Status status; 511 for (i=0; i<size; i++) { 512 if (rank != i) { 513 ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr); 514 } 515 } 516 for (i=0; i<size; i++) { 517 if (rank != i) { 518 ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr); 519 } 520 } 521 } 522 /* check if this processor node should be in debugger */ 523 ierr = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr); 524 lsize = size; 525 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr); 526 if (flag) { 527 for (i=0; i<lsize; i++) { 528 if (nodes[i] == rank) { flag = PETSC_FALSE; break; } 529 } 530 } 531 if (!flag) { 532 ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr); 533 ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr); 534 if (flg1) { 535 ierr = PetscAttachDebugger();CHKERRQ(ierr); 536 } else { 537 ierr = PetscStopForDebugger();CHKERRQ(ierr); 538 } 539 ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr); 540 ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr); 541 } 542 ierr = PetscFree(nodes);CHKERRQ(ierr); 543 } 544 545 ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr); 546 if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);} 547 548 #if defined(PETSC_USE_SOCKET_VIEWER) 549 /* 550 Activates new sockets for zope if needed 551 */ 552 ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr); 553 ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr); 554 if (flgz){ 555 int sockfd; 556 char hostname[256]; 557 char username[256]; 558 int remoteport = 9999; 559 560 ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr); 561 if (!hostname[0]){ 562 ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr); 563 } 564 ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr); 565 ierr = PetscGetUserName(username, 256);CHKERRQ(ierr); 566 PETSC_ZOPEFD = fdopen(sockfd, "w"); 567 if (flgzout){ 568 PETSC_STDOUT = PETSC_ZOPEFD; 569 fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username); 570 fprintf(PETSC_STDOUT, "<<<start>>>"); 571 } else { 572 fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username); 573 fprintf(PETSC_ZOPEFD, "<<<start>>>"); 574 } 575 } 576 #endif 577 #if defined(PETSC_USE_SERVER) 578 ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr); 579 if (flgz){ 580 PetscInt port = PETSC_DECIDE; 581 ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr); 582 ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr); 583 } 584 #endif 585 586 /* 587 Setup profiling and logging 588 */ 589 #if defined (PETSC_USE_INFO) 590 { 591 char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0; 592 ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr); 593 if (flg1 && logname[0]) { 594 ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr); 595 } else if (flg1) { 596 ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr); 597 } 598 } 599 #endif 600 #if defined(PETSC_USE_LOG) 601 mname[0] = 0; 602 ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); 603 if (flg1) { 604 if (mname[0]) { 605 ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr); 606 } else { 607 ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr); 608 } 609 } 610 #if defined(PETSC_HAVE_MPE) 611 flg1 = PETSC_FALSE; 612 ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr); 613 if (flg1) PetscLogMPEBegin(); 614 #endif 615 flg1 = PETSC_FALSE; 616 flg2 = PETSC_FALSE; 617 flg3 = PETSC_FALSE; 618 ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr); 619 ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr); 620 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr); 621 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr); 622 if (flg1) { ierr = PetscLogAllBegin();CHKERRQ(ierr); } 623 else if (flg2 || flg3 || flg4) { ierr = PetscLogBegin();CHKERRQ(ierr);} 624 625 ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr); 626 if (flg1) { 627 char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN]; 628 FILE *file; 629 if (mname[0]) { 630 sprintf(name,"%s.%d",mname,rank); 631 ierr = PetscFixFilename(name,fname);CHKERRQ(ierr); 632 file = fopen(fname,"w"); 633 if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname); 634 } else { 635 file = PETSC_STDOUT; 636 } 637 ierr = PetscLogTraceBegin(file);CHKERRQ(ierr); 638 } 639 #endif 640 641 /* 642 Setup building of stack frames for all function calls 643 */ 644 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD) 645 ierr = PetscStackCreate();CHKERRQ(ierr); 646 #endif 647 648 ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr); 649 650 #if defined(PETSC_USE_PTHREAD_CLASSES) 651 /* 652 Determine whether user specified maximum number of threads 653 */ 654 ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr); 655 656 /* 657 Determine whether to use thread pool 658 */ 659 ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr); 660 if (flg1) { 661 PetscUseThreadPool = PETSC_TRUE; 662 PetscInt N_CORES = get_nprocs(); 663 ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int)); 664 char tstr[9]; 665 char tbuf[2]; 666 strcpy(tstr,"-thread"); 667 for(i=0;i<PetscMaxThreads;i++) { 668 ThreadCoreAffinity[i] = i; 669 sprintf(tbuf,"%d",i); 670 strcat(tstr,tbuf); 671 ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr); 672 if(flg1) { 673 ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr); 674 ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */ 675 } 676 tstr[7] = '\0'; 677 } 678 /* get the thread pool type */ 679 PetscInt ipool = 0; 680 const char *choices[4] = {"true","tree","main","chain"}; 681 682 ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr); 683 switch(ipool) { 684 case 1: 685 PetscThreadFunc = &PetscThreadFunc_Tree; 686 PetscThreadInitialize = &PetscThreadInitialize_Tree; 687 PetscThreadFinalize = &PetscThreadFinalize_Tree; 688 MainWait = &MainWait_Tree; 689 MainJob = &MainJob_Tree; 690 PetscInfo(PETSC_NULL,"Using tree thread pool\n"); 691 break; 692 case 2: 693 PetscThreadFunc = &PetscThreadFunc_Main; 694 PetscThreadInitialize = &PetscThreadInitialize_Main; 695 PetscThreadFinalize = &PetscThreadFinalize_Main; 696 MainWait = &MainWait_Main; 697 MainJob = &MainJob_Main; 698 PetscInfo(PETSC_NULL,"Using main thread pool\n"); 699 break; 700 #if defined(PETSC_HAVE_PTHREAD_BARRIER) 701 case 3: 702 #else 703 default: 704 #endif 705 PetscThreadFunc = &PetscThreadFunc_Chain; 706 PetscThreadInitialize = &PetscThreadInitialize_Chain; 707 PetscThreadFinalize = &PetscThreadFinalize_Chain; 708 MainWait = &MainWait_Chain; 709 MainJob = &MainJob_Chain; 710 PetscInfo(PETSC_NULL,"Using chain thread pool\n"); 711 break; 712 #if defined(PETSC_HAVE_PTHREAD_BARRIER) 713 default: 714 PetscThreadFunc = &PetscThreadFunc_True; 715 PetscThreadInitialize = &PetscThreadInitialize_True; 716 PetscThreadFinalize = &PetscThreadFinalize_True; 717 MainWait = &MainWait_True; 718 MainJob = &MainJob_True; 719 PetscInfo(PETSC_NULL,"Using true thread pool\n"); 720 break; 721 #endif 722 } 723 PetscThreadInitialize(PetscMaxThreads); 724 } 725 #endif 726 /* 727 Print basic help message 728 */ 729 ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr); 730 if (flg1) { 731 ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr); 732 ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr); 733 ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr); 734 ierr = (*PetscHelpPrintf)(comm," only when run in the debugger\n");CHKERRQ(ierr); 735 ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr); 736 ierr = (*PetscHelpPrintf)(comm," start the debugger in new xterm\n");CHKERRQ(ierr); 737 ierr = (*PetscHelpPrintf)(comm," unless noxterm is given\n");CHKERRQ(ierr); 738 ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr); 739 ierr = (*PetscHelpPrintf)(comm," start all processes in the debugger\n");CHKERRQ(ierr); 740 ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr); 741 ierr = (*PetscHelpPrintf)(comm," emacs jumps to error file\n");CHKERRQ(ierr); 742 ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr); 743 ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr); 744 ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr); 745 ierr = (*PetscHelpPrintf)(comm," waits the delay for you to attach\n");CHKERRQ(ierr); 746 ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr); 747 ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr); 748 ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr); 749 ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr); 750 ierr = (*PetscHelpPrintf)(comm," note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr); 751 ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr); 752 ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr); 753 ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr); 754 ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr); 755 ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr); 756 ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr); 757 ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr); 758 ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr); 759 ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr); 760 ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr); 761 ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr); 762 ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr); 763 ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr); 764 #if defined(PETSC_USE_LOG) 765 ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr); 766 ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr); 767 ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr); 768 #if defined(PETSC_HAVE_MPE) 769 ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr); 770 #endif 771 ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr); 772 #endif 773 ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr); 774 ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr); 775 ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr); 776 ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr); 777 } 778 779 ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr); 780 if (flg1) { 781 ierr = PetscSleep(si);CHKERRQ(ierr); 782 } 783 784 ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); 785 ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr); 786 if (f) { 787 ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr); 788 } 789 790 #if defined(PETSC_HAVE_CUSP) 791 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr); 792 if (flg3) flg1 = PETSC_TRUE; 793 else flg1 = PETSC_FALSE; 794 ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr); 795 if (flg1) synchronizeCUSP = PETSC_TRUE; 796 #endif 797 798 PetscFunctionReturn(0); 799 } 800 801 #if defined(PETSC_USE_PTHREAD_CLASSES) 802 803 /**** 'Tree' Thread Pool Functions ****/ 804 void* PetscThreadFunc_Tree(void* arg) { 805 PetscErrorCode iterr; 806 int icorr,ierr; 807 int* pId = (int*)arg; 808 int ThreadId = *pId,Mary = 2,i,SubWorker; 809 PetscBool PeeOn; 810 cpu_set_t mset; 811 //printf("Thread %d In Tree Thread Function\n",ThreadId); 812 icorr = ThreadCoreAffinity[ThreadId]; 813 CPU_ZERO(&mset); 814 CPU_SET(icorr,&mset); 815 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 816 817 if((Mary*ThreadId+1)>(PetscMaxThreads-1)) { 818 PeeOn = PETSC_TRUE; 819 } 820 else { 821 PeeOn = PETSC_FALSE; 822 } 823 if(PeeOn==PETSC_FALSE) { 824 /* check your subordinates, wait for them to be ready */ 825 for(i=1;i<=Mary;i++) { 826 SubWorker = Mary*ThreadId+i; 827 if(SubWorker<PetscMaxThreads) { 828 ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]); 829 while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) { 830 /* upon entry, automically releases the lock and blocks 831 upon return, has the lock */ 832 ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]); 833 } 834 ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]); 835 } 836 } 837 /* your subordinates are now ready */ 838 } 839 ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]); 840 /* update your ready status */ 841 *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE; 842 if(ThreadId==0) { 843 job_tree.eJobStat = JobCompleted; 844 /* ignal main */ 845 ierr = pthread_cond_signal(&main_cond); 846 } 847 else { 848 /* tell your boss that you're ready to work */ 849 ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]); 850 } 851 /* the while loop needs to have an exit 852 the 'main' thread can terminate all the threads by performing a broadcast 853 and calling FuncFinish */ 854 while(PetscThreadGo) { 855 /*need to check the condition to ensure we don't have to wait 856 waiting when you don't have to causes problems 857 also need to check the condition to ensure proper handling of spurious wakeups */ 858 while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) { 859 /* upon entry, automically releases the lock and blocks 860 upon return, has the lock */ 861 ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]); 862 *(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE; 863 *(job_tree.arrThreadReady[ThreadId]) = PETSC_FALSE; 864 } 865 if(ThreadId==0) { 866 job_tree.startJob = PETSC_FALSE; 867 job_tree.eJobStat = ThreadsWorking; 868 } 869 ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]); 870 if(PeeOn==PETSC_FALSE) { 871 /* tell your subordinates it's time to get to work */ 872 for(i=1; i<=Mary; i++) { 873 SubWorker = Mary*ThreadId+i; 874 if(SubWorker<PetscMaxThreads) { 875 ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]); 876 } 877 } 878 } 879 /* do your job */ 880 if(job_tree.pdata==NULL) { 881 iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata); 882 } 883 else { 884 iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]); 885 } 886 if(iterr!=0) { 887 ithreaderr = 1; 888 } 889 if(PetscThreadGo) { 890 /* reset job, get ready for more */ 891 if(PeeOn==PETSC_FALSE) { 892 /* check your subordinates, waiting for them to be ready 893 how do you know for a fact that a given subordinate has actually started? */ 894 for(i=1;i<=Mary;i++) { 895 SubWorker = Mary*ThreadId+i; 896 if(SubWorker<PetscMaxThreads) { 897 ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]); 898 while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) { 899 /* upon entry, automically releases the lock and blocks 900 upon return, has the lock */ 901 ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]); 902 } 903 ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]); 904 } 905 } 906 /* your subordinates are now ready */ 907 } 908 ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]); 909 *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE; 910 if(ThreadId==0) { 911 job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */ 912 /* root thread signals 'main' */ 913 ierr = pthread_cond_signal(&main_cond); 914 } 915 else { 916 /* signal your boss before you go to sleep */ 917 ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]); 918 } 919 } 920 } 921 return NULL; 922 } 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "PetscThreadInitialize_Tree" 926 void* PetscThreadInitialize_Tree(PetscInt N) { 927 PetscInt i,ierr; 928 int status; 929 930 if(PetscUseThreadPool) { 931 size_t Val1 = (size_t)CACHE_LINE_SIZE; 932 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 933 arrmutex = (char*)memalign(Val1,Val2); 934 arrcond1 = (char*)memalign(Val1,Val2); 935 arrcond2 = (char*)memalign(Val1,Val2); 936 arrstart = (char*)memalign(Val1,Val2); 937 arrready = (char*)memalign(Val1,Val2); 938 job_tree.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 939 job_tree.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 940 job_tree.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 941 job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 942 job_tree.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 943 /* initialize job structure */ 944 for(i=0; i<PetscMaxThreads; i++) { 945 job_tree.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 946 job_tree.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 947 job_tree.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 948 job_tree.arrThreadStarted[i] = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i); 949 job_tree.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 950 } 951 for(i=0; i<PetscMaxThreads; i++) { 952 ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL); 953 ierr = pthread_cond_init(job_tree.cond1array[i],NULL); 954 ierr = pthread_cond_init(job_tree.cond2array[i],NULL); 955 *(job_tree.arrThreadStarted[i]) = PETSC_FALSE; 956 *(job_tree.arrThreadReady[i]) = PETSC_FALSE; 957 } 958 job_tree.pfunc = NULL; 959 job_tree.pdata = (void**)malloc(N*sizeof(void*)); 960 job_tree.startJob = PETSC_FALSE; 961 job_tree.eJobStat = JobInitiated; 962 pVal = (int*)malloc(N*sizeof(int)); 963 /* allocate memory in the heap for the thread structure */ 964 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 965 /* create threads */ 966 for(i=0; i<N; i++) { 967 pVal[i] = i; 968 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 969 /* should check status */ 970 } 971 } 972 return NULL; 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "PetscThreadFinalize_Tree" 977 PetscErrorCode PetscThreadFinalize_Tree() { 978 int i,ierr; 979 void* jstatus; 980 981 PetscFunctionBegin; 982 983 if(PetscUseThreadPool) { 984 MainJob(FuncFinish,NULL,PetscMaxThreads); /* set up job and broadcast work */ 985 /* join the threads */ 986 for(i=0; i<PetscMaxThreads; i++) { 987 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 988 /* do error checking*/ 989 } 990 free(PetscThreadPoint); 991 free(arrmutex); 992 free(arrcond1); 993 free(arrcond2); 994 free(arrstart); 995 free(arrready); 996 free(job_tree.pdata); 997 free(pVal); 998 } 999 else { 1000 } 1001 PetscFunctionReturn(0); 1002 } 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MainWait_Tree" 1006 void MainWait_Tree() { 1007 int ierr; 1008 ierr = pthread_mutex_lock(job_tree.mutexarray[0]); 1009 while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) { 1010 ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]); 1011 } 1012 ierr = pthread_mutex_unlock(job_tree.mutexarray[0]); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MainJob_Tree" 1017 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) { 1018 int i,ierr; 1019 PetscErrorCode ijoberr = 0; 1020 if(PetscUseThreadPool) { 1021 MainWait(); 1022 job_tree.pfunc = pFunc; 1023 job_tree.pdata = data; 1024 job_tree.startJob = PETSC_TRUE; 1025 for(i=0; i<PetscMaxThreads; i++) { 1026 *(job_tree.arrThreadStarted[i]) = PETSC_FALSE; 1027 } 1028 job_tree.eJobStat = JobInitiated; 1029 ierr = pthread_cond_signal(job_tree.cond2array[0]); 1030 if(pFunc!=FuncFinish) { 1031 MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */ 1032 } 1033 } 1034 else { 1035 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1036 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1037 PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */ 1038 free(apThread); 1039 } 1040 if(ithreaderr) { 1041 ijoberr = ithreaderr; 1042 } 1043 return ijoberr; 1044 } 1045 /**** ****/ 1046 1047 /**** 'Main' Thread Pool Functions ****/ 1048 void* PetscThreadFunc_Main(void* arg) { 1049 PetscErrorCode iterr; 1050 int icorr,ierr; 1051 int* pId = (int*)arg; 1052 int ThreadId = *pId; 1053 cpu_set_t mset; 1054 //printf("Thread %d In Main Thread Function\n",ThreadId); 1055 icorr = ThreadCoreAffinity[ThreadId]; 1056 CPU_ZERO(&mset); 1057 CPU_SET(icorr,&mset); 1058 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1059 1060 ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]); 1061 /* update your ready status */ 1062 *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE; 1063 /* tell the BOSS that you're ready to work before you go to sleep */ 1064 ierr = pthread_cond_signal(job_main.cond1array[ThreadId]); 1065 1066 /* the while loop needs to have an exit 1067 the 'main' thread can terminate all the threads by performing a broadcast 1068 and calling FuncFinish */ 1069 while(PetscThreadGo) { 1070 /* need to check the condition to ensure we don't have to wait 1071 waiting when you don't have to causes problems 1072 also need to check the condition to ensure proper handling of spurious wakeups */ 1073 while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) { 1074 /* upon entry, atomically releases the lock and blocks 1075 upon return, has the lock */ 1076 ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]); 1077 /* (job_main.arrThreadReady[ThreadId]) = PETSC_FALSE; */ 1078 } 1079 ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]); 1080 if(job_main.pdata==NULL) { 1081 iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata); 1082 } 1083 else { 1084 iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]); 1085 } 1086 if(iterr!=0) { 1087 ithreaderr = 1; 1088 } 1089 if(PetscThreadGo) { 1090 /* reset job, get ready for more */ 1091 ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]); 1092 *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE; 1093 /* tell the BOSS that you're ready to work before you go to sleep */ 1094 ierr = pthread_cond_signal(job_main.cond1array[ThreadId]); 1095 } 1096 } 1097 return NULL; 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "PetscThreadInitialize_Main" 1102 void* PetscThreadInitialize_Main(PetscInt N) { 1103 PetscInt i,ierr; 1104 int status; 1105 1106 if(PetscUseThreadPool) { 1107 size_t Val1 = (size_t)CACHE_LINE_SIZE; 1108 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 1109 arrmutex = (char*)memalign(Val1,Val2); 1110 arrcond1 = (char*)memalign(Val1,Val2); 1111 arrcond2 = (char*)memalign(Val1,Val2); 1112 arrstart = (char*)memalign(Val1,Val2); 1113 arrready = (char*)memalign(Val1,Val2); 1114 job_main.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 1115 job_main.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1116 job_main.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1117 job_main.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1118 /* initialize job structure */ 1119 for(i=0; i<PetscMaxThreads; i++) { 1120 job_main.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 1121 job_main.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 1122 job_main.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 1123 job_main.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 1124 } 1125 for(i=0; i<PetscMaxThreads; i++) { 1126 ierr = pthread_mutex_init(job_main.mutexarray[i],NULL); 1127 ierr = pthread_cond_init(job_main.cond1array[i],NULL); 1128 ierr = pthread_cond_init(job_main.cond2array[i],NULL); 1129 *(job_main.arrThreadReady[i]) = PETSC_FALSE; 1130 } 1131 job_main.pfunc = NULL; 1132 job_main.pdata = (void**)malloc(N*sizeof(void*)); 1133 pVal = (int*)malloc(N*sizeof(int)); 1134 /* allocate memory in the heap for the thread structure */ 1135 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1136 /* create threads */ 1137 for(i=0; i<N; i++) { 1138 pVal[i] = i; 1139 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1140 /* error check */ 1141 } 1142 } 1143 else { 1144 } 1145 return NULL; 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "PetscThreadFinalize_Main" 1150 PetscErrorCode PetscThreadFinalize_Main() { 1151 int i,ierr; 1152 void* jstatus; 1153 1154 PetscFunctionBegin; 1155 1156 if(PetscUseThreadPool) { 1157 MainJob(FuncFinish,NULL,PetscMaxThreads); /* set up job and broadcast work */ 1158 /* join the threads */ 1159 for(i=0; i<PetscMaxThreads; i++) { 1160 ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr); 1161 } 1162 free(PetscThreadPoint); 1163 free(arrmutex); 1164 free(arrcond1); 1165 free(arrcond2); 1166 free(arrstart); 1167 free(arrready); 1168 free(job_main.pdata); 1169 free(pVal); 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MainWait_Main" 1176 void MainWait_Main() { 1177 int i,ierr; 1178 for(i=0; i<PetscMaxThreads; i++) { 1179 ierr = pthread_mutex_lock(job_main.mutexarray[i]); 1180 while(*(job_main.arrThreadReady[i])==PETSC_FALSE) { 1181 ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]); 1182 } 1183 ierr = pthread_mutex_unlock(job_main.mutexarray[i]); 1184 } 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MainJob_Main" 1189 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) { 1190 int i,ierr; 1191 PetscErrorCode ijoberr = 0; 1192 if(PetscUseThreadPool) { 1193 MainWait(); /* you know everyone is waiting to be signalled! */ 1194 job_main.pfunc = pFunc; 1195 job_main.pdata = data; 1196 for(i=0; i<PetscMaxThreads; i++) { 1197 *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this? suppose you get into MainWait first */ 1198 } 1199 /* tell the threads to go to work */ 1200 for(i=0; i<PetscMaxThreads; i++) { 1201 ierr = pthread_cond_signal(job_main.cond2array[i]); 1202 } 1203 if(pFunc!=FuncFinish) { 1204 MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */ 1205 } 1206 } 1207 else { 1208 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1209 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1210 PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */ 1211 free(apThread); 1212 } 1213 if(ithreaderr) { 1214 ijoberr = ithreaderr; 1215 } 1216 return ijoberr; 1217 } 1218 /**** ****/ 1219 1220 /**** Chain Thread Functions ****/ 1221 void* PetscThreadFunc_Chain(void* arg) { 1222 PetscErrorCode iterr; 1223 int icorr,ierr; 1224 int* pId = (int*)arg; 1225 int ThreadId = *pId; 1226 int SubWorker = ThreadId + 1; 1227 PetscBool PeeOn; 1228 cpu_set_t mset; 1229 //printf("Thread %d In Chain Thread Function\n",ThreadId); 1230 icorr = ThreadCoreAffinity[ThreadId]; 1231 CPU_ZERO(&mset); 1232 CPU_SET(icorr,&mset); 1233 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1234 1235 if(ThreadId==(PetscMaxThreads-1)) { 1236 PeeOn = PETSC_TRUE; 1237 } 1238 else { 1239 PeeOn = PETSC_FALSE; 1240 } 1241 if(PeeOn==PETSC_FALSE) { 1242 /* check your subordinate, wait for him to be ready */ 1243 ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]); 1244 while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) { 1245 /* upon entry, automically releases the lock and blocks 1246 upon return, has the lock */ 1247 ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]); 1248 } 1249 ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]); 1250 /* your subordinate is now ready*/ 1251 } 1252 ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]); 1253 /* update your ready status */ 1254 *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE; 1255 if(ThreadId==0) { 1256 job_chain.eJobStat = JobCompleted; 1257 /* signal main */ 1258 ierr = pthread_cond_signal(&main_cond); 1259 } 1260 else { 1261 /* tell your boss that you're ready to work */ 1262 ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]); 1263 } 1264 /* the while loop needs to have an exit 1265 the 'main' thread can terminate all the threads by performing a broadcast 1266 and calling FuncFinish */ 1267 while(PetscThreadGo) { 1268 /* need to check the condition to ensure we don't have to wait 1269 waiting when you don't have to causes problems 1270 also need to check the condition to ensure proper handling of spurious wakeups */ 1271 while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) { 1272 /*upon entry, automically releases the lock and blocks 1273 upon return, has the lock */ 1274 ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]); 1275 *(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE; 1276 *(job_chain.arrThreadReady[ThreadId]) = PETSC_FALSE; 1277 } 1278 if(ThreadId==0) { 1279 job_chain.startJob = PETSC_FALSE; 1280 job_chain.eJobStat = ThreadsWorking; 1281 } 1282 ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]); 1283 if(PeeOn==PETSC_FALSE) { 1284 /* tell your subworker it's time to get to work */ 1285 ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]); 1286 } 1287 /* do your job */ 1288 if(job_chain.pdata==NULL) { 1289 iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata); 1290 } 1291 else { 1292 iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]); 1293 } 1294 if(iterr!=0) { 1295 ithreaderr = 1; 1296 } 1297 if(PetscThreadGo) { 1298 /* reset job, get ready for more */ 1299 if(PeeOn==PETSC_FALSE) { 1300 /* check your subordinate, wait for him to be ready 1301 how do you know for a fact that your subordinate has actually started? */ 1302 ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]); 1303 while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) { 1304 /* upon entry, automically releases the lock and blocks 1305 upon return, has the lock */ 1306 ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]); 1307 } 1308 ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]); 1309 /* your subordinate is now ready */ 1310 } 1311 ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]); 1312 *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE; 1313 if(ThreadId==0) { 1314 job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */ 1315 /* root thread (foreman) signals 'main' */ 1316 ierr = pthread_cond_signal(&main_cond); 1317 } 1318 else { 1319 /* signal your boss before you go to sleep */ 1320 ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]); 1321 } 1322 } 1323 } 1324 return NULL; 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "PetscThreadInitialize_Chain" 1329 void* PetscThreadInitialize_Chain(PetscInt N) { 1330 PetscInt i,ierr; 1331 int status; 1332 1333 if(PetscUseThreadPool) { 1334 size_t Val1 = (size_t)CACHE_LINE_SIZE; 1335 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 1336 arrmutex = (char*)memalign(Val1,Val2); 1337 arrcond1 = (char*)memalign(Val1,Val2); 1338 arrcond2 = (char*)memalign(Val1,Val2); 1339 arrstart = (char*)memalign(Val1,Val2); 1340 arrready = (char*)memalign(Val1,Val2); 1341 job_chain.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 1342 job_chain.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1343 job_chain.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1344 job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1345 job_chain.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1346 /* initialize job structure */ 1347 for(i=0; i<PetscMaxThreads; i++) { 1348 job_chain.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 1349 job_chain.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 1350 job_chain.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 1351 job_chain.arrThreadStarted[i] = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i); 1352 job_chain.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 1353 } 1354 for(i=0; i<PetscMaxThreads; i++) { 1355 ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL); 1356 ierr = pthread_cond_init(job_chain.cond1array[i],NULL); 1357 ierr = pthread_cond_init(job_chain.cond2array[i],NULL); 1358 *(job_chain.arrThreadStarted[i]) = PETSC_FALSE; 1359 *(job_chain.arrThreadReady[i]) = PETSC_FALSE; 1360 } 1361 job_chain.pfunc = NULL; 1362 job_chain.pdata = (void**)malloc(N*sizeof(void*)); 1363 job_chain.startJob = PETSC_FALSE; 1364 job_chain.eJobStat = JobInitiated; 1365 pVal = (int*)malloc(N*sizeof(int)); 1366 /* allocate memory in the heap for the thread structure */ 1367 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1368 /* create threads */ 1369 for(i=0; i<N; i++) { 1370 pVal[i] = i; 1371 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1372 /* should check error */ 1373 } 1374 } 1375 else { 1376 } 1377 return NULL; 1378 } 1379 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "PetscThreadFinalize_Chain" 1383 PetscErrorCode PetscThreadFinalize_Chain() { 1384 int i,ierr; 1385 void* jstatus; 1386 1387 PetscFunctionBegin; 1388 1389 if(PetscUseThreadPool) { 1390 MainJob(FuncFinish,NULL,PetscMaxThreads); /* set up job and broadcast work */ 1391 /* join the threads */ 1392 for(i=0; i<PetscMaxThreads; i++) { 1393 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1394 /* should check error */ 1395 } 1396 free(PetscThreadPoint); 1397 free(arrmutex); 1398 free(arrcond1); 1399 free(arrcond2); 1400 free(arrstart); 1401 free(arrready); 1402 free(job_chain.pdata); 1403 free(pVal); 1404 } 1405 else { 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MainWait_Chain" 1412 void MainWait_Chain() { 1413 int ierr; 1414 ierr = pthread_mutex_lock(job_chain.mutexarray[0]); 1415 while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) { 1416 ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]); 1417 } 1418 ierr = pthread_mutex_unlock(job_chain.mutexarray[0]); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MainJob_Chain" 1423 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) { 1424 int i,ierr; 1425 PetscErrorCode ijoberr = 0; 1426 if(PetscUseThreadPool) { 1427 MainWait(); 1428 job_chain.pfunc = pFunc; 1429 job_chain.pdata = data; 1430 job_chain.startJob = PETSC_TRUE; 1431 for(i=0; i<PetscMaxThreads; i++) { 1432 *(job_chain.arrThreadStarted[i]) = PETSC_FALSE; 1433 } 1434 job_chain.eJobStat = JobInitiated; 1435 ierr = pthread_cond_signal(job_chain.cond2array[0]); 1436 if(pFunc!=FuncFinish) { 1437 MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */ 1438 } 1439 } 1440 else { 1441 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1442 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1443 PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */ 1444 free(apThread); 1445 } 1446 if(ithreaderr) { 1447 ijoberr = ithreaderr; 1448 } 1449 return ijoberr; 1450 } 1451 /**** ****/ 1452 1453 #if defined(PETSC_HAVE_PTHREAD_BARRIER) 1454 /**** True Thread Functions ****/ 1455 void* PetscThreadFunc_True(void* arg) { 1456 int icorr,ierr,iVal; 1457 int* pId = (int*)arg; 1458 int ThreadId = *pId; 1459 PetscErrorCode iterr; 1460 cpu_set_t mset; 1461 //printf("Thread %d In True Pool Thread Function\n",ThreadId); 1462 icorr = ThreadCoreAffinity[ThreadId]; 1463 CPU_ZERO(&mset); 1464 CPU_SET(icorr,&mset); 1465 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1466 1467 ierr = pthread_mutex_lock(&job_true.mutex); 1468 job_true.iNumReadyThreads++; 1469 if(job_true.iNumReadyThreads==PetscMaxThreads) { 1470 ierr = pthread_cond_signal(&main_cond); 1471 } 1472 /*the while loop needs to have an exit 1473 the 'main' thread can terminate all the threads by performing a broadcast 1474 and calling FuncFinish */ 1475 while(PetscThreadGo) { 1476 /*need to check the condition to ensure we don't have to wait 1477 waiting when you don't have to causes problems 1478 also need to wait if another thread sneaks in and messes with the predicate */ 1479 while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) { 1480 /* upon entry, automically releases the lock and blocks 1481 upon return, has the lock */ 1482 ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex); 1483 } 1484 job_true.startJob = PETSC_FALSE; 1485 job_true.iNumJobThreads--; 1486 job_true.iNumReadyThreads--; 1487 iVal = PetscMaxThreads-job_true.iNumReadyThreads-1; 1488 pthread_mutex_unlock(&job_true.mutex); 1489 if(job_true.pdata==NULL) { 1490 iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata); 1491 } 1492 else { 1493 iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]); 1494 } 1495 if(iterr!=0) { 1496 ithreaderr = 1; 1497 } 1498 /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads 1499 what happens if a thread finishes before they all start? BAD! 1500 what happens if a thread finishes before any else start? BAD! */ 1501 pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */ 1502 /* reset job */ 1503 if(PetscThreadGo) { 1504 pthread_mutex_lock(&job_true.mutex); 1505 job_true.iNumReadyThreads++; 1506 if(job_true.iNumReadyThreads==PetscMaxThreads) { 1507 /* signal the 'main' thread that the job is done! (only done once) */ 1508 ierr = pthread_cond_signal(&main_cond); 1509 } 1510 } 1511 } 1512 return NULL; 1513 } 1514 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "PetscThreadInitialize_True" 1517 void* PetscThreadInitialize_True(PetscInt N) { 1518 PetscInt i; 1519 int status; 1520 1521 if(PetscUseThreadPool) { 1522 pVal = (int*)malloc(N*sizeof(int)); 1523 /* allocate memory in the heap for the thread structure */ 1524 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1525 BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */ 1526 job_true.pdata = (void**)malloc(N*sizeof(void*)); 1527 for(i=0; i<N; i++) { 1528 pVal[i] = i; 1529 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1530 /* error check to ensure proper thread creation */ 1531 status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1); 1532 /* should check error */ 1533 } 1534 } 1535 else { 1536 } 1537 return NULL; 1538 } 1539 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "PetscThreadFinalize_True" 1543 PetscErrorCode PetscThreadFinalize_True() { 1544 int i,ierr; 1545 void* jstatus; 1546 1547 PetscFunctionBegin; 1548 1549 if(PetscUseThreadPool) { 1550 MainJob(FuncFinish,NULL,PetscMaxThreads); /* set up job and broadcast work */ 1551 /* join the threads */ 1552 for(i=0; i<PetscMaxThreads; i++) { 1553 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1554 /* should check error */ 1555 } 1556 free(BarrPoint); 1557 free(PetscThreadPoint); 1558 } 1559 else { 1560 } 1561 PetscFunctionReturn(0); 1562 } 1563 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MainWait_True" 1566 void MainWait_True() { 1567 int ierr; 1568 while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) { 1569 ierr = pthread_cond_wait(&main_cond,&job_true.mutex); 1570 } 1571 ierr = pthread_mutex_unlock(&job_true.mutex); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MainJob_True" 1576 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) { 1577 int ierr; 1578 PetscErrorCode ijoberr = 0; 1579 if(PetscUseThreadPool) { 1580 MainWait(); 1581 job_true.pfunc = pFunc; 1582 job_true.pdata = data; 1583 job_true.pbarr = &BarrPoint[n]; 1584 job_true.iNumJobThreads = n; 1585 job_true.startJob = PETSC_TRUE; 1586 ierr = pthread_cond_broadcast(&job_true.cond); 1587 if(pFunc!=FuncFinish) { 1588 MainWait(); /* why wait after? guarantees that job gets done */ 1589 } 1590 } 1591 else { 1592 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1593 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1594 PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */ 1595 free(apThread); 1596 } 1597 if(ithreaderr) { 1598 ijoberr = ithreaderr; 1599 } 1600 return ijoberr; 1601 } 1602 /**** ****/ 1603 #endif 1604 1605 void* FuncFinish(void* arg) { 1606 PetscThreadGo = PETSC_FALSE; 1607 return(0); 1608 } 1609 1610 #endif 1611