1 /* 2 3 This file defines part of the initialization of PETSc 4 5 This file uses regular malloc and free because it cannot know 6 what malloc is being used until it has already processed the input. 7 */ 8 9 #define _GNU_SOURCE 10 #include <sched.h> 11 #include <petscsys.h> /*I "petscsys.h" I*/ 12 #if defined(PETSC_USE_PTHREAD) 13 #include <pthread.h> 14 #endif 15 #if defined(PETSC_HAVE_SYS_SYSINFO_H) 16 #include <sys/sysinfo.h> 17 #endif 18 #include <unistd.h> 19 #if defined(PETSC_HAVE_STDLIB_H) 20 #include <stdlib.h> 21 #endif 22 #if defined(PETSC_HAVE_MALLOC_H) 23 #include <malloc.h> 24 #endif 25 #if defined(PETSC_HAVE_VALGRIND) 26 #include <valgrind/valgrind.h> 27 #endif 28 29 /* ------------------------Nasty global variables -------------------------------*/ 30 /* 31 Indicates if PETSc started up MPI, or it was 32 already started before PETSc was initialized. 33 */ 34 PetscBool PetscBeganMPI = PETSC_FALSE; 35 PetscBool PetscInitializeCalled = PETSC_FALSE; 36 PetscBool PetscFinalizeCalled = PETSC_FALSE; 37 PetscBool PetscUseThreadPool = PETSC_FALSE; 38 PetscBool PetscThreadGo = PETSC_TRUE; 39 PetscMPIInt PetscGlobalRank = -1; 40 PetscMPIInt PetscGlobalSize = -1; 41 42 #if defined(PETSC_USE_PTHREAD_CLASSES) 43 PetscMPIInt PetscMaxThreads = 2; 44 pthread_t* PetscThreadPoint; 45 #define PETSC_HAVE_PTHREAD_BARRIER 46 #if defined(PETSC_HAVE_PTHREAD_BARRIER) 47 pthread_barrier_t* BarrPoint; /* used by 'true' thread pool */ 48 #endif 49 PetscErrorCode ithreaderr = 0; 50 int* pVal; 51 52 #define CACHE_LINE_SIZE 64 /* used by 'chain', 'main','tree' thread pools */ 53 int* ThreadCoreAffinity; 54 55 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat; /* used by 'chain','tree' thread pool */ 56 57 typedef struct { 58 pthread_mutex_t** mutexarray; 59 pthread_cond_t** cond1array; 60 pthread_cond_t** cond2array; 61 void* (*pfunc)(void*); 62 void** pdata; 63 PetscBool startJob; 64 estat eJobStat; 65 PetscBool** arrThreadStarted; 66 PetscBool** arrThreadReady; 67 } sjob_tree; 68 sjob_tree job_tree; 69 typedef struct { 70 pthread_mutex_t** mutexarray; 71 pthread_cond_t** cond1array; 72 pthread_cond_t** cond2array; 73 void* (*pfunc)(void*); 74 void** pdata; 75 PetscBool** arrThreadReady; 76 } sjob_main; 77 sjob_main job_main; 78 typedef struct { 79 pthread_mutex_t** mutexarray; 80 pthread_cond_t** cond1array; 81 pthread_cond_t** cond2array; 82 void* (*pfunc)(void*); 83 void** pdata; 84 PetscBool startJob; 85 estat eJobStat; 86 PetscBool** arrThreadStarted; 87 PetscBool** arrThreadReady; 88 } sjob_chain; 89 sjob_chain job_chain; 90 #if defined(PETSC_HAVE_PTHREAD_BARRIER) 91 typedef struct { 92 pthread_mutex_t mutex; 93 pthread_cond_t cond; 94 void* (*pfunc)(void*); 95 void** pdata; 96 pthread_barrier_t* pbarr; 97 int iNumJobThreads; 98 int iNumReadyThreads; 99 PetscBool startJob; 100 } sjob_true; 101 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE}; 102 #endif 103 104 pthread_cond_t main_cond = PTHREAD_COND_INITIALIZER; /* used by 'true', 'chain','tree' thread pools */ 105 char* arrmutex; /* used by 'chain','main','tree' thread pools */ 106 char* arrcond1; /* used by 'chain','main','tree' thread pools */ 107 char* arrcond2; /* used by 'chain','main','tree' thread pools */ 108 char* arrstart; /* used by 'chain','main','tree' thread pools */ 109 char* arrready; /* used by 'chain','main','tree' thread pools */ 110 111 /* Function Pointers */ 112 void* (*PetscThreadFunc)(void*) = NULL; 113 void* (*PetscThreadInitialize)(PetscInt) = NULL; 114 PetscErrorCode (*PetscThreadFinalize)(void) = NULL; 115 void (*MainWait)(void) = NULL; 116 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL; 117 /**** Tree Functions ****/ 118 void* PetscThreadFunc_Tree(void*); 119 void* PetscThreadInitialize_Tree(PetscInt); 120 PetscErrorCode PetscThreadFinalize_Tree(void); 121 void MainWait_Tree(void); 122 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt); 123 /**** Main Functions ****/ 124 void* PetscThreadFunc_Main(void*); 125 void* PetscThreadInitialize_Main(PetscInt); 126 PetscErrorCode PetscThreadFinalize_Main(void); 127 void MainWait_Main(void); 128 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt); 129 /**** Chain Functions ****/ 130 void* PetscThreadFunc_Chain(void*); 131 void* PetscThreadInitialize_Chain(PetscInt); 132 PetscErrorCode PetscThreadFinalize_Chain(void); 133 void MainWait_Chain(void); 134 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt); 135 /**** True Functions ****/ 136 void* PetscThreadFunc_True(void*); 137 void* PetscThreadInitialize_True(PetscInt); 138 PetscErrorCode PetscThreadFinalize_True(void); 139 void MainWait_True(void); 140 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt); 141 /**** ****/ 142 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt); 143 144 void* FuncFinish(void*); 145 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**); 146 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*); 147 #endif 148 149 #if defined(PETSC_USE_COMPLEX) 150 #if defined(PETSC_COMPLEX_INSTANTIATE) 151 template <> class std::complex<double>; /* instantiate complex template class */ 152 #endif 153 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX) 154 MPI_Datatype MPI_C_DOUBLE_COMPLEX; 155 MPI_Datatype MPI_C_COMPLEX; 156 #endif 157 PetscScalar PETSC_i; 158 #else 159 PetscScalar PETSC_i = 0.0; 160 #endif 161 #if defined(PETSC_USE_REAL___FLOAT128) 162 MPI_Datatype MPIU___FLOAT128 = 0; 163 #endif 164 MPI_Datatype MPIU_2SCALAR = 0; 165 MPI_Datatype MPIU_2INT = 0; 166 167 /* 168 These are needed by petscbt.h 169 */ 170 #include <petscbt.h> 171 char _BT_mask = ' '; 172 char _BT_c = ' '; 173 PetscInt _BT_idx = 0; 174 175 /* 176 Function that is called to display all error messages 177 */ 178 PetscErrorCode (*PetscErrorPrintf)(const char [],...) = PetscErrorPrintfDefault; 179 PetscErrorCode (*PetscHelpPrintf)(MPI_Comm,const char [],...) = PetscHelpPrintfDefault; 180 #if defined(PETSC_HAVE_MATLAB_ENGINE) 181 PetscErrorCode (*PetscVFPrintf)(FILE*,const char[],va_list) = PetscVFPrintf_Matlab; 182 #else 183 PetscErrorCode (*PetscVFPrintf)(FILE*,const char[],va_list) = PetscVFPrintfDefault; 184 #endif 185 /* 186 This is needed to turn on/off cusp synchronization */ 187 PetscBool synchronizeCUSP = PETSC_FALSE; 188 189 /* ------------------------------------------------------------------------------*/ 190 /* 191 Optional file where all PETSc output from various prints is saved 192 */ 193 FILE *petsc_history = PETSC_NULL; 194 195 #undef __FUNCT__ 196 #define __FUNCT__ "PetscOpenHistoryFile" 197 PetscErrorCode PetscOpenHistoryFile(const char filename[],FILE **fd) 198 { 199 PetscErrorCode ierr; 200 PetscMPIInt rank,size; 201 char pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64]; 202 char version[256]; 203 204 PetscFunctionBegin; 205 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 206 if (!rank) { 207 char arch[10]; 208 int err; 209 PetscViewer viewer; 210 211 ierr = PetscGetArchType(arch,10);CHKERRQ(ierr); 212 ierr = PetscGetDate(date,64);CHKERRQ(ierr); 213 ierr = PetscGetVersion(version,256);CHKERRQ(ierr); 214 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 215 if (filename) { 216 ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr); 217 } else { 218 ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr); 219 ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr); 220 ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr); 221 } 222 223 *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname); 224 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 225 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr); 226 ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 227 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr); 228 ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr); 229 ierr = PetscOptionsView(viewer);CHKERRQ(ierr); 230 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 231 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 232 err = fflush(*fd); 233 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "PetscCloseHistoryFile" 240 PetscErrorCode PetscCloseHistoryFile(FILE **fd) 241 { 242 PetscErrorCode ierr; 243 PetscMPIInt rank; 244 char date[64]; 245 int err; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 249 if (!rank) { 250 ierr = PetscGetDate(date,64);CHKERRQ(ierr); 251 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 252 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr); 253 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 254 err = fflush(*fd); 255 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); 256 err = fclose(*fd); 257 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file"); 258 } 259 PetscFunctionReturn(0); 260 } 261 262 /* ------------------------------------------------------------------------------*/ 263 264 /* 265 This is ugly and probably belongs somewhere else, but I want to 266 be able to put a true MPI abort error handler with command line args. 267 268 This is so MPI errors in the debugger will leave all the stack 269 frames. The default MP_Abort() cleans up and exits thus providing no useful information 270 in the debugger hence we call abort() instead of MPI_Abort(). 271 */ 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "Petsc_MPI_AbortOnError" 275 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag) 276 { 277 PetscFunctionBegin; 278 (*PetscErrorPrintf)("MPI error %d\n",*flag); 279 abort(); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "Petsc_MPI_DebuggerOnError" 284 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag) 285 { 286 PetscErrorCode ierr; 287 288 PetscFunctionBegin; 289 (*PetscErrorPrintf)("MPI error %d\n",*flag); 290 ierr = PetscAttachDebugger(); 291 if (ierr) { /* hopeless so get out */ 292 MPI_Abort(*comm,*flag); 293 } 294 } 295 296 #undef __FUNCT__ 297 #define __FUNCT__ "PetscEnd" 298 /*@C 299 PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one 300 wishes a clean exit somewhere deep in the program. 301 302 Collective on PETSC_COMM_WORLD 303 304 Options Database Keys are the same as for PetscFinalize() 305 306 Level: advanced 307 308 Note: 309 See PetscInitialize() for more general runtime options. 310 311 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize() 312 @*/ 313 PetscErrorCode PetscEnd(void) 314 { 315 PetscFunctionBegin; 316 PetscFinalize(); 317 exit(0); 318 return 0; 319 } 320 321 PetscBool PetscOptionsPublish = PETSC_FALSE; 322 extern PetscErrorCode PetscSetUseTrMalloc_Private(void); 323 extern PetscBool petscsetmallocvisited; 324 static char emacsmachinename[256]; 325 326 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0; 327 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm) = 0; 328 329 #undef __FUNCT__ 330 #define __FUNCT__ "PetscSetHelpVersionFunctions" 331 /*@C 332 PetscSetHelpVersionFunctions - Sets functions that print help and version information 333 before the PETSc help and version information is printed. Must call BEFORE PetscInitialize(). 334 This routine enables a "higher-level" package that uses PETSc to print its messages first. 335 336 Input Parameter: 337 + help - the help function (may be PETSC_NULL) 338 - version - the version function (may be PETSC_NULL) 339 340 Level: developer 341 342 Concepts: package help message 343 344 @*/ 345 PetscErrorCode PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm)) 346 { 347 PetscFunctionBegin; 348 PetscExternalHelpFunction = help; 349 PetscExternalVersionFunction = version; 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "PetscOptionsCheckInitial_Private" 355 PetscErrorCode PetscOptionsCheckInitial_Private(void) 356 { 357 char string[64],mname[PETSC_MAX_PATH_LEN],*f; 358 MPI_Comm comm = PETSC_COMM_WORLD; 359 PetscBool flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout; 360 PetscErrorCode ierr; 361 PetscReal si; 362 int i; 363 PetscMPIInt rank; 364 char version[256]; 365 366 PetscFunctionBegin; 367 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 368 369 /* 370 Setup the memory management; support for tracing malloc() usage 371 */ 372 ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr); 373 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD) 374 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr); 375 if ((!flg2 || flg1) && !petscsetmallocvisited) { 376 #if defined(PETSC_HAVE_VALGRIND) 377 if (flg2 || !(RUNNING_ON_VALGRIND)) { 378 /* turn off default -malloc if valgrind is being used */ 379 #endif 380 ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr); 381 #if defined(PETSC_HAVE_VALGRIND) 382 } 383 #endif 384 } 385 #else 386 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr); 387 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr); 388 if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);} 389 #endif 390 if (flg3) { 391 ierr = PetscMallocSetDumpLog();CHKERRQ(ierr); 392 } 393 flg1 = PETSC_FALSE; 394 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr); 395 if (flg1) { 396 ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr); 397 ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr); 398 } 399 400 flg1 = PETSC_FALSE; 401 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr); 402 if (!flg1) { 403 flg1 = PETSC_FALSE; 404 ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr); 405 } 406 if (flg1) { 407 ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr); 408 } 409 410 /* 411 Set the display variable for graphics 412 */ 413 ierr = PetscSetDisplay();CHKERRQ(ierr); 414 415 416 /* 417 Print the PETSc version information 418 */ 419 ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr); 420 ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr); 421 ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr); 422 if (flg1 || flg2 || flg3){ 423 424 /* 425 Print "higher-level" package version message 426 */ 427 if (PetscExternalVersionFunction) { 428 ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr); 429 } 430 431 ierr = PetscGetVersion(version,256);CHKERRQ(ierr); 432 ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\ 433 ------------------------------\n");CHKERRQ(ierr); 434 ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr); 435 ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr); 436 ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr); 437 ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr); 438 ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr); 439 ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr); 440 ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\ 441 ------------------------------\n");CHKERRQ(ierr); 442 } 443 444 /* 445 Print "higher-level" package help message 446 */ 447 if (flg3){ 448 if (PetscExternalHelpFunction) { 449 ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr); 450 } 451 } 452 453 /* 454 Setup the error handling 455 */ 456 flg1 = PETSC_FALSE; 457 ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr); 458 if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);} 459 flg1 = PETSC_FALSE; 460 ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr); 461 if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);} 462 flg1 = PETSC_FALSE; 463 ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr); 464 if (flg1) { 465 ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr); 466 } 467 flg1 = PETSC_FALSE; 468 ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr); 469 if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);} 470 flg1 = PETSC_FALSE; 471 ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr); 472 if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);} 473 474 /* 475 Setup debugger information 476 */ 477 ierr = PetscSetDefaultDebugger();CHKERRQ(ierr); 478 ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr); 479 if (flg1) { 480 MPI_Errhandler err_handler; 481 482 ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr); 483 ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr); 484 ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr); 485 ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr); 486 } 487 ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr); 488 if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); } 489 ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr); 490 ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr); 491 if (flg1 || flg2) { 492 PetscMPIInt size; 493 PetscInt lsize,*nodes; 494 MPI_Errhandler err_handler; 495 /* 496 we have to make sure that all processors have opened 497 connections to all other processors, otherwise once the 498 debugger has stated it is likely to receive a SIGUSR1 499 and kill the program. 500 */ 501 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 502 if (size > 2) { 503 PetscMPIInt dummy = 0; 504 MPI_Status status; 505 for (i=0; i<size; i++) { 506 if (rank != i) { 507 ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr); 508 } 509 } 510 for (i=0; i<size; i++) { 511 if (rank != i) { 512 ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr); 513 } 514 } 515 } 516 /* check if this processor node should be in debugger */ 517 ierr = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr); 518 lsize = size; 519 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr); 520 if (flag) { 521 for (i=0; i<lsize; i++) { 522 if (nodes[i] == rank) { flag = PETSC_FALSE; break; } 523 } 524 } 525 if (!flag) { 526 ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr); 527 ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr); 528 if (flg1) { 529 ierr = PetscAttachDebugger();CHKERRQ(ierr); 530 } else { 531 ierr = PetscStopForDebugger();CHKERRQ(ierr); 532 } 533 ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr); 534 ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr); 535 } 536 ierr = PetscFree(nodes);CHKERRQ(ierr); 537 } 538 539 ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr); 540 if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);} 541 542 #if defined(PETSC_USE_SOCKET_VIEWER) 543 /* 544 Activates new sockets for zope if needed 545 */ 546 ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr); 547 ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr); 548 if (flgz){ 549 int sockfd; 550 char hostname[256]; 551 char username[256]; 552 int remoteport = 9999; 553 554 ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr); 555 if (!hostname[0]){ 556 ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr); 557 } 558 ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr); 559 ierr = PetscGetUserName(username, 256);CHKERRQ(ierr); 560 PETSC_ZOPEFD = fdopen(sockfd, "w"); 561 if (flgzout){ 562 PETSC_STDOUT = PETSC_ZOPEFD; 563 fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username); 564 fprintf(PETSC_STDOUT, "<<<start>>>"); 565 } else { 566 fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username); 567 fprintf(PETSC_ZOPEFD, "<<<start>>>"); 568 } 569 } 570 #endif 571 #if defined(PETSC_USE_SERVER) 572 ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr); 573 if (flgz){ 574 PetscInt port = PETSC_DECIDE; 575 ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr); 577 } 578 #endif 579 580 /* 581 Setup profiling and logging 582 */ 583 #if defined (PETSC_USE_INFO) 584 { 585 char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0; 586 ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr); 587 if (flg1 && logname[0]) { 588 ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr); 589 } else if (flg1) { 590 ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr); 591 } 592 } 593 #endif 594 #if defined(PETSC_USE_LOG) 595 mname[0] = 0; 596 ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); 597 if (flg1) { 598 if (mname[0]) { 599 ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr); 600 } else { 601 ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr); 602 } 603 } 604 #if defined(PETSC_HAVE_MPE) 605 flg1 = PETSC_FALSE; 606 ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr); 607 if (flg1) PetscLogMPEBegin(); 608 #endif 609 flg1 = PETSC_FALSE; 610 flg2 = PETSC_FALSE; 611 flg3 = PETSC_FALSE; 612 ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr); 613 ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr); 614 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr); 615 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr); 616 if (flg1) { ierr = PetscLogAllBegin();CHKERRQ(ierr); } 617 else if (flg2 || flg3 || flg4) { ierr = PetscLogBegin();CHKERRQ(ierr);} 618 619 ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr); 620 if (flg1) { 621 char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN]; 622 FILE *file; 623 if (mname[0]) { 624 sprintf(name,"%s.%d",mname,rank); 625 ierr = PetscFixFilename(name,fname);CHKERRQ(ierr); 626 file = fopen(fname,"w"); 627 if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname); 628 } else { 629 file = PETSC_STDOUT; 630 } 631 ierr = PetscLogTraceBegin(file);CHKERRQ(ierr); 632 } 633 #endif 634 635 /* 636 Setup building of stack frames for all function calls 637 */ 638 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD) 639 ierr = PetscStackCreate();CHKERRQ(ierr); 640 #endif 641 642 ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr); 643 644 #if defined(PETSC_USE_PTHREAD_CLASSES) 645 /* 646 Determine whether user specified maximum number of threads 647 */ 648 ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr); 649 650 /* 651 Determine whether to use thread pool 652 */ 653 ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr); 654 if (flg1) { 655 PetscUseThreadPool = PETSC_TRUE; 656 PetscInt N_CORES = get_nprocs(); 657 ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int)); 658 char tstr[9]; 659 char tbuf[2]; 660 strcpy(tstr,"-thread"); 661 for(i=0;i<PetscMaxThreads;i++) { 662 ThreadCoreAffinity[i] = i; 663 sprintf(tbuf,"%d",i); 664 strcat(tstr,tbuf); 665 ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr); 666 if(flg1) { 667 ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr); 668 ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */ 669 } 670 tstr[7] = '\0'; 671 } 672 /* get the thread pool type */ 673 PetscInt ipool = 0; 674 const char *choices[4] = {"true","tree","main","chain"}; 675 676 ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr); 677 switch(ipool) { 678 case 1: 679 PetscThreadFunc = &PetscThreadFunc_Tree; 680 PetscThreadInitialize = &PetscThreadInitialize_Tree; 681 PetscThreadFinalize = &PetscThreadFinalize_Tree; 682 MainWait = &MainWait_Tree; 683 MainJob = &MainJob_Tree; 684 PetscInfo(PETSC_NULL,"Using tree thread pool\n"); 685 break; 686 case 2: 687 PetscThreadFunc = &PetscThreadFunc_Main; 688 PetscThreadInitialize = &PetscThreadInitialize_Main; 689 PetscThreadFinalize = &PetscThreadFinalize_Main; 690 MainWait = &MainWait_Main; 691 MainJob = &MainJob_Main; 692 PetscInfo(PETSC_NULL,"Using main thread pool\n"); 693 break; 694 #if defined(PETSC_HAVE_PTHREAD_BARRIER) 695 case 3: 696 #else 697 default: 698 #endif 699 PetscThreadFunc = &PetscThreadFunc_Chain; 700 PetscThreadInitialize = &PetscThreadInitialize_Chain; 701 PetscThreadFinalize = &PetscThreadFinalize_Chain; 702 MainWait = &MainWait_Chain; 703 MainJob = &MainJob_Chain; 704 PetscInfo(PETSC_NULL,"Using chain thread pool\n"); 705 break; 706 #if defined(PETSC_HAVE_PTHREAD_BARRIER) 707 default: 708 PetscThreadFunc = &PetscThreadFunc_True; 709 PetscThreadInitialize = &PetscThreadInitialize_True; 710 PetscThreadFinalize = &PetscThreadFinalize_True; 711 MainWait = &MainWait_True; 712 MainJob = &MainJob_True; 713 PetscInfo(PETSC_NULL,"Using true thread pool\n"); 714 break; 715 #endif 716 } 717 PetscThreadInitialize(PetscMaxThreads); 718 } else { 719 //need to define these in the case on 'no threads' or 'thread create/destroy' 720 //could take any of the above versions 721 MainJob = &MainJob_Spawn; 722 } 723 #endif 724 /* 725 Print basic help message 726 */ 727 ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr); 728 if (flg1) { 729 ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr); 730 ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr); 731 ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr); 732 ierr = (*PetscHelpPrintf)(comm," only when run in the debugger\n");CHKERRQ(ierr); 733 ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr); 734 ierr = (*PetscHelpPrintf)(comm," start the debugger in new xterm\n");CHKERRQ(ierr); 735 ierr = (*PetscHelpPrintf)(comm," unless noxterm is given\n");CHKERRQ(ierr); 736 ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr); 737 ierr = (*PetscHelpPrintf)(comm," start all processes in the debugger\n");CHKERRQ(ierr); 738 ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr); 739 ierr = (*PetscHelpPrintf)(comm," emacs jumps to error file\n");CHKERRQ(ierr); 740 ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr); 741 ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr); 742 ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr); 743 ierr = (*PetscHelpPrintf)(comm," waits the delay for you to attach\n");CHKERRQ(ierr); 744 ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr); 745 ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr); 746 ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr); 747 ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr); 748 ierr = (*PetscHelpPrintf)(comm," note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr); 749 ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr); 750 ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr); 751 ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr); 752 ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr); 753 ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr); 754 ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr); 755 ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr); 756 ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr); 757 ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr); 758 ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr); 759 ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr); 760 ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr); 761 ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr); 762 ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr); 763 #if defined(PETSC_USE_LOG) 764 ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr); 765 ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr); 766 ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr); 767 #if defined(PETSC_HAVE_MPE) 768 ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr); 769 #endif 770 ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr); 771 #endif 772 ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr); 773 ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr); 774 ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr); 775 ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr); 776 } 777 778 ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr); 779 if (flg1) { 780 ierr = PetscSleep(si);CHKERRQ(ierr); 781 } 782 783 ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); 784 ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr); 785 if (f) { 786 ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr); 787 } 788 789 #if defined(PETSC_HAVE_CUSP) 790 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr); 791 if (flg3) flg1 = PETSC_TRUE; 792 else flg1 = PETSC_FALSE; 793 ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr); 794 if (flg1) synchronizeCUSP = PETSC_TRUE; 795 #endif 796 797 PetscFunctionReturn(0); 798 } 799 800 #if defined(PETSC_USE_PTHREAD_CLASSES) 801 802 /**** 'Tree' Thread Pool Functions ****/ 803 void* PetscThreadFunc_Tree(void* arg) { 804 PetscErrorCode iterr; 805 int icorr,ierr; 806 int* pId = (int*)arg; 807 int ThreadId = *pId,Mary = 2,i,SubWorker; 808 PetscBool PeeOn; 809 cpu_set_t mset; 810 //printf("Thread %d In Tree Thread Function\n",ThreadId); 811 icorr = ThreadCoreAffinity[ThreadId]; 812 CPU_ZERO(&mset); 813 CPU_SET(icorr,&mset); 814 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 815 816 if((Mary*ThreadId+1)>(PetscMaxThreads-1)) { 817 PeeOn = PETSC_TRUE; 818 } 819 else { 820 PeeOn = PETSC_FALSE; 821 } 822 if(PeeOn==PETSC_FALSE) { 823 /* check your subordinates, wait for them to be ready */ 824 for(i=1;i<=Mary;i++) { 825 SubWorker = Mary*ThreadId+i; 826 if(SubWorker<PetscMaxThreads) { 827 ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]); 828 while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) { 829 /* upon entry, automically releases the lock and blocks 830 upon return, has the lock */ 831 ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]); 832 } 833 ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]); 834 } 835 } 836 /* your subordinates are now ready */ 837 } 838 ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]); 839 /* update your ready status */ 840 *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE; 841 if(ThreadId==0) { 842 job_tree.eJobStat = JobCompleted; 843 /* ignal main */ 844 ierr = pthread_cond_signal(&main_cond); 845 } 846 else { 847 /* tell your boss that you're ready to work */ 848 ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]); 849 } 850 /* the while loop needs to have an exit 851 the 'main' thread can terminate all the threads by performing a broadcast 852 and calling FuncFinish */ 853 while(PetscThreadGo) { 854 /*need to check the condition to ensure we don't have to wait 855 waiting when you don't have to causes problems 856 also need to check the condition to ensure proper handling of spurious wakeups */ 857 while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) { 858 /* upon entry, automically releases the lock and blocks 859 upon return, has the lock */ 860 ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]); 861 *(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE; 862 *(job_tree.arrThreadReady[ThreadId]) = PETSC_FALSE; 863 } 864 if(ThreadId==0) { 865 job_tree.startJob = PETSC_FALSE; 866 job_tree.eJobStat = ThreadsWorking; 867 } 868 ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]); 869 if(PeeOn==PETSC_FALSE) { 870 /* tell your subordinates it's time to get to work */ 871 for(i=1; i<=Mary; i++) { 872 SubWorker = Mary*ThreadId+i; 873 if(SubWorker<PetscMaxThreads) { 874 ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]); 875 } 876 } 877 } 878 /* do your job */ 879 if(job_tree.pdata==NULL) { 880 iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata); 881 } 882 else { 883 iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]); 884 } 885 if(iterr!=0) { 886 ithreaderr = 1; 887 } 888 if(PetscThreadGo) { 889 /* reset job, get ready for more */ 890 if(PeeOn==PETSC_FALSE) { 891 /* check your subordinates, waiting for them to be ready 892 how do you know for a fact that a given subordinate has actually started? */ 893 for(i=1;i<=Mary;i++) { 894 SubWorker = Mary*ThreadId+i; 895 if(SubWorker<PetscMaxThreads) { 896 ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]); 897 while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) { 898 /* upon entry, automically releases the lock and blocks 899 upon return, has the lock */ 900 ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]); 901 } 902 ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]); 903 } 904 } 905 /* your subordinates are now ready */ 906 } 907 ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]); 908 *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE; 909 if(ThreadId==0) { 910 job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */ 911 /* root thread signals 'main' */ 912 ierr = pthread_cond_signal(&main_cond); 913 } 914 else { 915 /* signal your boss before you go to sleep */ 916 ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]); 917 } 918 } 919 } 920 return NULL; 921 } 922 923 #undef __FUNCT__ 924 #define __FUNCT__ "PetscThreadInitialize_Tree" 925 void* PetscThreadInitialize_Tree(PetscInt N) { 926 PetscInt i,ierr; 927 int status; 928 929 if(PetscUseThreadPool) { 930 size_t Val1 = (size_t)CACHE_LINE_SIZE; 931 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 932 arrmutex = (char*)memalign(Val1,Val2); 933 arrcond1 = (char*)memalign(Val1,Val2); 934 arrcond2 = (char*)memalign(Val1,Val2); 935 arrstart = (char*)memalign(Val1,Val2); 936 arrready = (char*)memalign(Val1,Val2); 937 job_tree.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 938 job_tree.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 939 job_tree.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 940 job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 941 job_tree.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 942 /* initialize job structure */ 943 for(i=0; i<PetscMaxThreads; i++) { 944 job_tree.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 945 job_tree.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 946 job_tree.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 947 job_tree.arrThreadStarted[i] = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i); 948 job_tree.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 949 } 950 for(i=0; i<PetscMaxThreads; i++) { 951 ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL); 952 ierr = pthread_cond_init(job_tree.cond1array[i],NULL); 953 ierr = pthread_cond_init(job_tree.cond2array[i],NULL); 954 *(job_tree.arrThreadStarted[i]) = PETSC_FALSE; 955 *(job_tree.arrThreadReady[i]) = PETSC_FALSE; 956 } 957 job_tree.pfunc = NULL; 958 job_tree.pdata = (void**)malloc(N*sizeof(void*)); 959 job_tree.startJob = PETSC_FALSE; 960 job_tree.eJobStat = JobInitiated; 961 pVal = (int*)malloc(N*sizeof(int)); 962 /* allocate memory in the heap for the thread structure */ 963 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 964 /* create threads */ 965 for(i=0; i<N; i++) { 966 pVal[i] = i; 967 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 968 /* should check status */ 969 } 970 } 971 return NULL; 972 } 973 974 #undef __FUNCT__ 975 #define __FUNCT__ "PetscThreadFinalize_Tree" 976 PetscErrorCode PetscThreadFinalize_Tree() { 977 int i,ierr; 978 void* jstatus; 979 980 PetscFunctionBegin; 981 982 if(PetscUseThreadPool) { 983 MainJob(FuncFinish,NULL,PetscMaxThreads); /* set up job and broadcast work */ 984 /* join the threads */ 985 for(i=0; i<PetscMaxThreads; i++) { 986 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 987 /* do error checking*/ 988 } 989 free(PetscThreadPoint); 990 free(arrmutex); 991 free(arrcond1); 992 free(arrcond2); 993 free(arrstart); 994 free(arrready); 995 free(job_tree.pdata); 996 free(pVal); 997 } 998 else { 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MainWait_Tree" 1005 void MainWait_Tree() { 1006 int ierr; 1007 ierr = pthread_mutex_lock(job_tree.mutexarray[0]); 1008 while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) { 1009 ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]); 1010 } 1011 ierr = pthread_mutex_unlock(job_tree.mutexarray[0]); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MainJob_Tree" 1016 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) { 1017 int i,ierr; 1018 PetscErrorCode ijoberr = 0; 1019 if(PetscUseThreadPool) { 1020 MainWait(); 1021 job_tree.pfunc = pFunc; 1022 job_tree.pdata = data; 1023 job_tree.startJob = PETSC_TRUE; 1024 for(i=0; i<PetscMaxThreads; i++) { 1025 *(job_tree.arrThreadStarted[i]) = PETSC_FALSE; 1026 } 1027 job_tree.eJobStat = JobInitiated; 1028 ierr = pthread_cond_signal(job_tree.cond2array[0]); 1029 if(pFunc!=FuncFinish) { 1030 MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */ 1031 } 1032 } 1033 else { 1034 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1035 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1036 PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */ 1037 free(apThread); 1038 } 1039 if(ithreaderr) { 1040 ijoberr = ithreaderr; 1041 } 1042 return ijoberr; 1043 } 1044 /**** ****/ 1045 1046 /**** 'Main' Thread Pool Functions ****/ 1047 void* PetscThreadFunc_Main(void* arg) { 1048 PetscErrorCode iterr; 1049 int icorr,ierr; 1050 int* pId = (int*)arg; 1051 int ThreadId = *pId; 1052 cpu_set_t mset; 1053 //printf("Thread %d In Main Thread Function\n",ThreadId); 1054 icorr = ThreadCoreAffinity[ThreadId]; 1055 CPU_ZERO(&mset); 1056 CPU_SET(icorr,&mset); 1057 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1058 1059 ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]); 1060 /* update your ready status */ 1061 *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE; 1062 /* tell the BOSS that you're ready to work before you go to sleep */ 1063 ierr = pthread_cond_signal(job_main.cond1array[ThreadId]); 1064 1065 /* the while loop needs to have an exit 1066 the 'main' thread can terminate all the threads by performing a broadcast 1067 and calling FuncFinish */ 1068 while(PetscThreadGo) { 1069 /* need to check the condition to ensure we don't have to wait 1070 waiting when you don't have to causes problems 1071 also need to check the condition to ensure proper handling of spurious wakeups */ 1072 while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) { 1073 /* upon entry, atomically releases the lock and blocks 1074 upon return, has the lock */ 1075 ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]); 1076 /* (job_main.arrThreadReady[ThreadId]) = PETSC_FALSE; */ 1077 } 1078 ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]); 1079 if(job_main.pdata==NULL) { 1080 iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata); 1081 } 1082 else { 1083 iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]); 1084 } 1085 if(iterr!=0) { 1086 ithreaderr = 1; 1087 } 1088 if(PetscThreadGo) { 1089 /* reset job, get ready for more */ 1090 ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]); 1091 *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE; 1092 /* tell the BOSS that you're ready to work before you go to sleep */ 1093 ierr = pthread_cond_signal(job_main.cond1array[ThreadId]); 1094 } 1095 } 1096 return NULL; 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "PetscThreadInitialize_Main" 1101 void* PetscThreadInitialize_Main(PetscInt N) { 1102 PetscInt i,ierr; 1103 int status; 1104 1105 if(PetscUseThreadPool) { 1106 size_t Val1 = (size_t)CACHE_LINE_SIZE; 1107 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 1108 arrmutex = (char*)memalign(Val1,Val2); 1109 arrcond1 = (char*)memalign(Val1,Val2); 1110 arrcond2 = (char*)memalign(Val1,Val2); 1111 arrstart = (char*)memalign(Val1,Val2); 1112 arrready = (char*)memalign(Val1,Val2); 1113 job_main.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 1114 job_main.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1115 job_main.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1116 job_main.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1117 /* initialize job structure */ 1118 for(i=0; i<PetscMaxThreads; i++) { 1119 job_main.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 1120 job_main.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 1121 job_main.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 1122 job_main.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 1123 } 1124 for(i=0; i<PetscMaxThreads; i++) { 1125 ierr = pthread_mutex_init(job_main.mutexarray[i],NULL); 1126 ierr = pthread_cond_init(job_main.cond1array[i],NULL); 1127 ierr = pthread_cond_init(job_main.cond2array[i],NULL); 1128 *(job_main.arrThreadReady[i]) = PETSC_FALSE; 1129 } 1130 job_main.pfunc = NULL; 1131 job_main.pdata = (void**)malloc(N*sizeof(void*)); 1132 pVal = (int*)malloc(N*sizeof(int)); 1133 /* allocate memory in the heap for the thread structure */ 1134 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1135 /* create threads */ 1136 for(i=0; i<N; i++) { 1137 pVal[i] = i; 1138 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1139 /* error check */ 1140 } 1141 } 1142 else { 1143 } 1144 return NULL; 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "PetscThreadFinalize_Main" 1149 PetscErrorCode PetscThreadFinalize_Main() { 1150 int i,ierr; 1151 void* jstatus; 1152 1153 PetscFunctionBegin; 1154 1155 if(PetscUseThreadPool) { 1156 MainJob(FuncFinish,NULL,PetscMaxThreads); /* set up job and broadcast work */ 1157 /* join the threads */ 1158 for(i=0; i<PetscMaxThreads; i++) { 1159 ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr); 1160 } 1161 free(PetscThreadPoint); 1162 free(arrmutex); 1163 free(arrcond1); 1164 free(arrcond2); 1165 free(arrstart); 1166 free(arrready); 1167 free(job_main.pdata); 1168 free(pVal); 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MainWait_Main" 1175 void MainWait_Main() { 1176 int i,ierr; 1177 for(i=0; i<PetscMaxThreads; i++) { 1178 ierr = pthread_mutex_lock(job_main.mutexarray[i]); 1179 while(*(job_main.arrThreadReady[i])==PETSC_FALSE) { 1180 ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]); 1181 } 1182 ierr = pthread_mutex_unlock(job_main.mutexarray[i]); 1183 } 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MainJob_Main" 1188 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) { 1189 int i,ierr; 1190 PetscErrorCode ijoberr = 0; 1191 if(PetscUseThreadPool) { 1192 MainWait(); /* you know everyone is waiting to be signalled! */ 1193 job_main.pfunc = pFunc; 1194 job_main.pdata = data; 1195 for(i=0; i<PetscMaxThreads; i++) { 1196 *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this? suppose you get into MainWait first */ 1197 } 1198 /* tell the threads to go to work */ 1199 for(i=0; i<PetscMaxThreads; i++) { 1200 ierr = pthread_cond_signal(job_main.cond2array[i]); 1201 } 1202 if(pFunc!=FuncFinish) { 1203 MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */ 1204 } 1205 } 1206 else { 1207 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1208 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1209 PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */ 1210 free(apThread); 1211 } 1212 if(ithreaderr) { 1213 ijoberr = ithreaderr; 1214 } 1215 return ijoberr; 1216 } 1217 /**** ****/ 1218 1219 /**** Chain Thread Functions ****/ 1220 void* PetscThreadFunc_Chain(void* arg) { 1221 PetscErrorCode iterr; 1222 int icorr,ierr; 1223 int* pId = (int*)arg; 1224 int ThreadId = *pId; 1225 int SubWorker = ThreadId + 1; 1226 PetscBool PeeOn; 1227 cpu_set_t mset; 1228 //printf("Thread %d In Chain Thread Function\n",ThreadId); 1229 icorr = ThreadCoreAffinity[ThreadId]; 1230 CPU_ZERO(&mset); 1231 CPU_SET(icorr,&mset); 1232 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1233 1234 if(ThreadId==(PetscMaxThreads-1)) { 1235 PeeOn = PETSC_TRUE; 1236 } 1237 else { 1238 PeeOn = PETSC_FALSE; 1239 } 1240 if(PeeOn==PETSC_FALSE) { 1241 /* check your subordinate, wait for him to be ready */ 1242 ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]); 1243 while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) { 1244 /* upon entry, automically releases the lock and blocks 1245 upon return, has the lock */ 1246 ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]); 1247 } 1248 ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]); 1249 /* your subordinate is now ready*/ 1250 } 1251 ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]); 1252 /* update your ready status */ 1253 *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE; 1254 if(ThreadId==0) { 1255 job_chain.eJobStat = JobCompleted; 1256 /* signal main */ 1257 ierr = pthread_cond_signal(&main_cond); 1258 } 1259 else { 1260 /* tell your boss that you're ready to work */ 1261 ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]); 1262 } 1263 /* the while loop needs to have an exit 1264 the 'main' thread can terminate all the threads by performing a broadcast 1265 and calling FuncFinish */ 1266 while(PetscThreadGo) { 1267 /* need to check the condition to ensure we don't have to wait 1268 waiting when you don't have to causes problems 1269 also need to check the condition to ensure proper handling of spurious wakeups */ 1270 while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) { 1271 /*upon entry, automically releases the lock and blocks 1272 upon return, has the lock */ 1273 ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]); 1274 *(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE; 1275 *(job_chain.arrThreadReady[ThreadId]) = PETSC_FALSE; 1276 } 1277 if(ThreadId==0) { 1278 job_chain.startJob = PETSC_FALSE; 1279 job_chain.eJobStat = ThreadsWorking; 1280 } 1281 ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]); 1282 if(PeeOn==PETSC_FALSE) { 1283 /* tell your subworker it's time to get to work */ 1284 ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]); 1285 } 1286 /* do your job */ 1287 if(job_chain.pdata==NULL) { 1288 iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata); 1289 } 1290 else { 1291 iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]); 1292 } 1293 if(iterr!=0) { 1294 ithreaderr = 1; 1295 } 1296 if(PetscThreadGo) { 1297 /* reset job, get ready for more */ 1298 if(PeeOn==PETSC_FALSE) { 1299 /* check your subordinate, wait for him to be ready 1300 how do you know for a fact that your subordinate has actually started? */ 1301 ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]); 1302 while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) { 1303 /* upon entry, automically releases the lock and blocks 1304 upon return, has the lock */ 1305 ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]); 1306 } 1307 ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]); 1308 /* your subordinate is now ready */ 1309 } 1310 ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]); 1311 *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE; 1312 if(ThreadId==0) { 1313 job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */ 1314 /* root thread (foreman) signals 'main' */ 1315 ierr = pthread_cond_signal(&main_cond); 1316 } 1317 else { 1318 /* signal your boss before you go to sleep */ 1319 ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]); 1320 } 1321 } 1322 } 1323 return NULL; 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "PetscThreadInitialize_Chain" 1328 void* PetscThreadInitialize_Chain(PetscInt N) { 1329 PetscInt i,ierr; 1330 int status; 1331 1332 if(PetscUseThreadPool) { 1333 size_t Val1 = (size_t)CACHE_LINE_SIZE; 1334 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 1335 arrmutex = (char*)memalign(Val1,Val2); 1336 arrcond1 = (char*)memalign(Val1,Val2); 1337 arrcond2 = (char*)memalign(Val1,Val2); 1338 arrstart = (char*)memalign(Val1,Val2); 1339 arrready = (char*)memalign(Val1,Val2); 1340 job_chain.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 1341 job_chain.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1342 job_chain.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1343 job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1344 job_chain.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1345 /* initialize job structure */ 1346 for(i=0; i<PetscMaxThreads; i++) { 1347 job_chain.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 1348 job_chain.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 1349 job_chain.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 1350 job_chain.arrThreadStarted[i] = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i); 1351 job_chain.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 1352 } 1353 for(i=0; i<PetscMaxThreads; i++) { 1354 ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL); 1355 ierr = pthread_cond_init(job_chain.cond1array[i],NULL); 1356 ierr = pthread_cond_init(job_chain.cond2array[i],NULL); 1357 *(job_chain.arrThreadStarted[i]) = PETSC_FALSE; 1358 *(job_chain.arrThreadReady[i]) = PETSC_FALSE; 1359 } 1360 job_chain.pfunc = NULL; 1361 job_chain.pdata = (void**)malloc(N*sizeof(void*)); 1362 job_chain.startJob = PETSC_FALSE; 1363 job_chain.eJobStat = JobInitiated; 1364 pVal = (int*)malloc(N*sizeof(int)); 1365 /* allocate memory in the heap for the thread structure */ 1366 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1367 /* create threads */ 1368 for(i=0; i<N; i++) { 1369 pVal[i] = i; 1370 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1371 /* should check error */ 1372 } 1373 } 1374 else { 1375 } 1376 return NULL; 1377 } 1378 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "PetscThreadFinalize_Chain" 1382 PetscErrorCode PetscThreadFinalize_Chain() { 1383 int i,ierr; 1384 void* jstatus; 1385 1386 PetscFunctionBegin; 1387 1388 if(PetscUseThreadPool) { 1389 MainJob(FuncFinish,NULL,PetscMaxThreads); /* set up job and broadcast work */ 1390 /* join the threads */ 1391 for(i=0; i<PetscMaxThreads; i++) { 1392 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1393 /* should check error */ 1394 } 1395 free(PetscThreadPoint); 1396 free(arrmutex); 1397 free(arrcond1); 1398 free(arrcond2); 1399 free(arrstart); 1400 free(arrready); 1401 free(job_chain.pdata); 1402 free(pVal); 1403 } 1404 else { 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MainWait_Chain" 1411 void MainWait_Chain() { 1412 int ierr; 1413 ierr = pthread_mutex_lock(job_chain.mutexarray[0]); 1414 while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) { 1415 ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]); 1416 } 1417 ierr = pthread_mutex_unlock(job_chain.mutexarray[0]); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MainJob_Chain" 1422 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) { 1423 int i,ierr; 1424 PetscErrorCode ijoberr = 0; 1425 if(PetscUseThreadPool) { 1426 MainWait(); 1427 job_chain.pfunc = pFunc; 1428 job_chain.pdata = data; 1429 job_chain.startJob = PETSC_TRUE; 1430 for(i=0; i<PetscMaxThreads; i++) { 1431 *(job_chain.arrThreadStarted[i]) = PETSC_FALSE; 1432 } 1433 job_chain.eJobStat = JobInitiated; 1434 ierr = pthread_cond_signal(job_chain.cond2array[0]); 1435 if(pFunc!=FuncFinish) { 1436 MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */ 1437 } 1438 } 1439 else { 1440 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1441 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1442 PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */ 1443 free(apThread); 1444 } 1445 if(ithreaderr) { 1446 ijoberr = ithreaderr; 1447 } 1448 return ijoberr; 1449 } 1450 /**** ****/ 1451 1452 #if defined(PETSC_HAVE_PTHREAD_BARRIER) 1453 /**** True Thread Functions ****/ 1454 void* PetscThreadFunc_True(void* arg) { 1455 int icorr,ierr,iVal; 1456 int* pId = (int*)arg; 1457 int ThreadId = *pId; 1458 PetscErrorCode iterr; 1459 cpu_set_t mset; 1460 //printf("Thread %d In True Pool Thread Function\n",ThreadId); 1461 icorr = ThreadCoreAffinity[ThreadId]; 1462 CPU_ZERO(&mset); 1463 CPU_SET(icorr,&mset); 1464 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1465 1466 ierr = pthread_mutex_lock(&job_true.mutex); 1467 job_true.iNumReadyThreads++; 1468 if(job_true.iNumReadyThreads==PetscMaxThreads) { 1469 ierr = pthread_cond_signal(&main_cond); 1470 } 1471 /*the while loop needs to have an exit 1472 the 'main' thread can terminate all the threads by performing a broadcast 1473 and calling FuncFinish */ 1474 while(PetscThreadGo) { 1475 /*need to check the condition to ensure we don't have to wait 1476 waiting when you don't have to causes problems 1477 also need to wait if another thread sneaks in and messes with the predicate */ 1478 while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) { 1479 /* upon entry, automically releases the lock and blocks 1480 upon return, has the lock */ 1481 ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex); 1482 } 1483 job_true.startJob = PETSC_FALSE; 1484 job_true.iNumJobThreads--; 1485 job_true.iNumReadyThreads--; 1486 iVal = PetscMaxThreads-job_true.iNumReadyThreads-1; 1487 pthread_mutex_unlock(&job_true.mutex); 1488 if(job_true.pdata==NULL) { 1489 iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata); 1490 } 1491 else { 1492 iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]); 1493 } 1494 if(iterr!=0) { 1495 ithreaderr = 1; 1496 } 1497 /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads 1498 what happens if a thread finishes before they all start? BAD! 1499 what happens if a thread finishes before any else start? BAD! */ 1500 pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */ 1501 /* reset job */ 1502 if(PetscThreadGo) { 1503 pthread_mutex_lock(&job_true.mutex); 1504 job_true.iNumReadyThreads++; 1505 if(job_true.iNumReadyThreads==PetscMaxThreads) { 1506 /* signal the 'main' thread that the job is done! (only done once) */ 1507 ierr = pthread_cond_signal(&main_cond); 1508 } 1509 } 1510 } 1511 return NULL; 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "PetscThreadInitialize_True" 1516 void* PetscThreadInitialize_True(PetscInt N) { 1517 PetscInt i; 1518 int status; 1519 1520 if(PetscUseThreadPool) { 1521 pVal = (int*)malloc(N*sizeof(int)); 1522 /* allocate memory in the heap for the thread structure */ 1523 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1524 BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */ 1525 job_true.pdata = (void**)malloc(N*sizeof(void*)); 1526 for(i=0; i<N; i++) { 1527 pVal[i] = i; 1528 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1529 /* error check to ensure proper thread creation */ 1530 status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1); 1531 /* should check error */ 1532 } 1533 } 1534 else { 1535 } 1536 return NULL; 1537 } 1538 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "PetscThreadFinalize_True" 1542 PetscErrorCode PetscThreadFinalize_True() { 1543 int i,ierr; 1544 void* jstatus; 1545 1546 PetscFunctionBegin; 1547 1548 if(PetscUseThreadPool) { 1549 MainJob(FuncFinish,NULL,PetscMaxThreads); /* set up job and broadcast work */ 1550 /* join the threads */ 1551 for(i=0; i<PetscMaxThreads; i++) { 1552 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1553 /* should check error */ 1554 } 1555 free(BarrPoint); 1556 free(PetscThreadPoint); 1557 } 1558 else { 1559 } 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MainWait_True" 1565 void MainWait_True() { 1566 int ierr; 1567 while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) { 1568 ierr = pthread_cond_wait(&main_cond,&job_true.mutex); 1569 } 1570 ierr = pthread_mutex_unlock(&job_true.mutex); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MainJob_True" 1575 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) { 1576 int ierr; 1577 PetscErrorCode ijoberr = 0; 1578 if(PetscUseThreadPool) { 1579 MainWait(); 1580 job_true.pfunc = pFunc; 1581 job_true.pdata = data; 1582 job_true.pbarr = &BarrPoint[n]; 1583 job_true.iNumJobThreads = n; 1584 job_true.startJob = PETSC_TRUE; 1585 ierr = pthread_cond_broadcast(&job_true.cond); 1586 if(pFunc!=FuncFinish) { 1587 MainWait(); /* why wait after? guarantees that job gets done */ 1588 } 1589 } 1590 else { 1591 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1592 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1593 PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */ 1594 free(apThread); 1595 } 1596 if(ithreaderr) { 1597 ijoberr = ithreaderr; 1598 } 1599 return ijoberr; 1600 } 1601 /**** NO THREAD POOL FUNCTION ****/ 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MainJob_Spawn" 1604 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) { 1605 PetscErrorCode ijoberr = 0; 1606 1607 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1608 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1609 PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */ 1610 free(apThread); 1611 1612 return ijoberr; 1613 } 1614 /**** ****/ 1615 #endif 1616 1617 void* FuncFinish(void* arg) { 1618 PetscThreadGo = PETSC_FALSE; 1619 return(0); 1620 } 1621 1622 #endif 1623