1 //new kds file - implements the M-ary tree 2 /* 3 4 This file defines part of the initialization of PETSc 5 6 This file uses regular malloc and free because it cannot know 7 what malloc is being used until it has already processed the input. 8 */ 9 10 #define _GNU_SOURCE 11 #include <sched.h> 12 #include <petscsys.h> /*I "petscsys.h" I*/ 13 #include <pthread.h> 14 #include <sys/sysinfo.h> 15 #include <unistd.h> 16 #if defined(PETSC_HAVE_STDLIB_H) 17 #include <stdlib.h> 18 #endif 19 #if defined(PETSC_HAVE_MALLOC_H) 20 #include <malloc.h> 21 #endif 22 #if defined(PETSC_HAVE_VALGRIND) 23 #include <valgrind/valgrind.h> 24 #endif 25 26 /* ------------------------Nasty global variables -------------------------------*/ 27 /* 28 Indicates if PETSc started up MPI, or it was 29 already started before PETSc was initialized. 30 */ 31 PetscBool PetscBeganMPI = PETSC_FALSE; 32 PetscBool PetscInitializeCalled = PETSC_FALSE; 33 PetscBool PetscFinalizeCalled = PETSC_FALSE; 34 PetscBool PetscUseThreadPool = PETSC_FALSE; 35 PetscBool PetscThreadGo = PETSC_TRUE; 36 PetscMPIInt PetscGlobalRank = -1; 37 PetscMPIInt PetscGlobalSize = -1; 38 PetscMPIInt PetscMaxThreads = 2; 39 pthread_t* PetscThreadPoint; 40 pthread_barrier_t* BarrPoint; //used by 'true' thread pool 41 PetscErrorCode ithreaderr = 0; 42 int* pVal; 43 44 #define CACHE_LINE_SIZE 64 //used by 'chain', 'main','tree' thread pools 45 int* ThreadCoreAffinity; 46 47 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat; //used by 'chain','tree' thread pool 48 49 typedef struct { 50 pthread_mutex_t** mutexarray; 51 pthread_cond_t** cond1array; 52 pthread_cond_t** cond2array; 53 void* (*pfunc)(void*); 54 void** pdata; 55 PetscBool startJob; 56 estat eJobStat; 57 PetscBool** arrThreadStarted; 58 PetscBool** arrThreadReady; 59 } sjob_tree; 60 sjob_tree job_tree; 61 typedef struct { 62 pthread_mutex_t** mutexarray; 63 pthread_cond_t** cond1array; 64 pthread_cond_t** cond2array; 65 void* (*pfunc)(void*); 66 void** pdata; 67 PetscBool** arrThreadReady; 68 } sjob_main; 69 sjob_main job_main; 70 typedef struct { 71 pthread_mutex_t** mutexarray; 72 pthread_cond_t** cond1array; 73 pthread_cond_t** cond2array; 74 void* (*pfunc)(void*); 75 void** pdata; 76 PetscBool startJob; 77 estat eJobStat; 78 PetscBool** arrThreadStarted; 79 PetscBool** arrThreadReady; 80 } sjob_chain; 81 sjob_chain job_chain; 82 typedef struct { 83 pthread_mutex_t mutex; 84 pthread_cond_t cond; 85 void* (*pfunc)(void*); 86 void** pdata; 87 pthread_barrier_t* pbarr; 88 int iNumJobThreads; 89 int iNumReadyThreads; 90 PetscBool startJob; 91 } sjob_true; 92 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE}; 93 94 pthread_cond_t main_cond = PTHREAD_COND_INITIALIZER; //used by 'true', 'chain','tree' thread pools 95 char* arrmutex; //used by 'chain','main','tree' thread pools 96 char* arrcond1; //used by 'chain','main','tree' thread pools 97 char* arrcond2; //used by 'chain','main','tree' thread pools 98 char* arrstart; //used by 'chain','main','tree' thread pools 99 char* arrready; //used by 'chain','main','tree' thread pools 100 101 /* Function Pointers */ 102 void* (*PetscThreadFunc)(void*) = NULL; 103 void* (*PetscThreadInitialize)(PetscInt) = NULL; 104 PetscErrorCode (*PetscThreadFinalize)(void) = NULL; 105 void (*MainWait)(void) = NULL; 106 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL; 107 /**** Tree Functions ****/ 108 void* PetscThreadFunc_Tree(void*); 109 void* PetscThreadInitialize_Tree(PetscInt); 110 PetscErrorCode PetscThreadFinalize_Tree(void); 111 void MainWait_Tree(void); 112 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt); 113 /**** Main Functions ****/ 114 void* PetscThreadFunc_Main(void*); 115 void* PetscThreadInitialize_Main(PetscInt); 116 PetscErrorCode PetscThreadFinalize_Main(void); 117 void MainWait_Main(void); 118 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt); 119 /**** Chain Functions ****/ 120 void* PetscThreadFunc_Chain(void*); 121 void* PetscThreadInitialize_Chain(PetscInt); 122 PetscErrorCode PetscThreadFinalize_Chain(void); 123 void MainWait_Chain(void); 124 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt); 125 /**** True Functions ****/ 126 void* PetscThreadFunc_True(void*); 127 void* PetscThreadInitialize_True(PetscInt); 128 PetscErrorCode PetscThreadFinalize_True(void); 129 void MainWait_True(void); 130 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt); 131 /**** ****/ 132 133 void* FuncFinish(void*); 134 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**); 135 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*); 136 137 #if defined(PETSC_USE_COMPLEX) 138 #if defined(PETSC_COMPLEX_INSTANTIATE) 139 template <> class std::complex<double>; /* instantiate complex template class */ 140 #endif 141 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX) 142 MPI_Datatype MPI_C_DOUBLE_COMPLEX; 143 MPI_Datatype MPI_C_COMPLEX; 144 #endif 145 PetscScalar PETSC_i; 146 #else 147 PetscScalar PETSC_i = 0.0; 148 #endif 149 #if defined(PETSC_USE_REAL___FLOAT128) 150 MPI_Datatype MPIU___FLOAT128 = 0; 151 #endif 152 MPI_Datatype MPIU_2SCALAR = 0; 153 MPI_Datatype MPIU_2INT = 0; 154 155 /* 156 These are needed by petscbt.h 157 */ 158 #include <petscbt.h> 159 char _BT_mask = ' '; 160 char _BT_c = ' '; 161 PetscInt _BT_idx = 0; 162 163 /* 164 Function that is called to display all error messages 165 */ 166 PetscErrorCode (*PetscErrorPrintf)(const char [],...) = PetscErrorPrintfDefault; 167 PetscErrorCode (*PetscHelpPrintf)(MPI_Comm,const char [],...) = PetscHelpPrintfDefault; 168 #if defined(PETSC_HAVE_MATLAB_ENGINE) 169 PetscErrorCode (*PetscVFPrintf)(FILE*,const char[],va_list) = PetscVFPrintf_Matlab; 170 #else 171 PetscErrorCode (*PetscVFPrintf)(FILE*,const char[],va_list) = PetscVFPrintfDefault; 172 #endif 173 /* 174 This is needed to turn on/off cusp synchronization */ 175 PetscBool synchronizeCUSP = PETSC_FALSE; 176 177 /* ------------------------------------------------------------------------------*/ 178 /* 179 Optional file where all PETSc output from various prints is saved 180 */ 181 FILE *petsc_history = PETSC_NULL; 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "PetscOpenHistoryFile" 185 PetscErrorCode PetscOpenHistoryFile(const char filename[],FILE **fd) 186 { 187 PetscErrorCode ierr; 188 PetscMPIInt rank,size; 189 char pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64]; 190 char version[256]; 191 192 PetscFunctionBegin; 193 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 194 if (!rank) { 195 char arch[10]; 196 int err; 197 PetscViewer viewer; 198 199 ierr = PetscGetArchType(arch,10);CHKERRQ(ierr); 200 ierr = PetscGetDate(date,64);CHKERRQ(ierr); 201 ierr = PetscGetVersion(version,256);CHKERRQ(ierr); 202 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 203 if (filename) { 204 ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr); 205 } else { 206 ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr); 207 ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr); 208 ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr); 209 } 210 211 *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname); 212 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 213 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr); 214 ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 215 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr); 216 ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr); 217 ierr = PetscOptionsView(viewer);CHKERRQ(ierr); 218 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 219 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 220 err = fflush(*fd); 221 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); 222 } 223 PetscFunctionReturn(0); 224 } 225 226 #undef __FUNCT__ 227 #define __FUNCT__ "PetscCloseHistoryFile" 228 PetscErrorCode PetscCloseHistoryFile(FILE **fd) 229 { 230 PetscErrorCode ierr; 231 PetscMPIInt rank; 232 char date[64]; 233 int err; 234 235 PetscFunctionBegin; 236 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 237 if (!rank) { 238 ierr = PetscGetDate(date,64);CHKERRQ(ierr); 239 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 240 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr); 241 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 242 err = fflush(*fd); 243 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); 244 err = fclose(*fd); 245 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file"); 246 } 247 PetscFunctionReturn(0); 248 } 249 250 /* ------------------------------------------------------------------------------*/ 251 252 /* 253 This is ugly and probably belongs somewhere else, but I want to 254 be able to put a true MPI abort error handler with command line args. 255 256 This is so MPI errors in the debugger will leave all the stack 257 frames. The default MP_Abort() cleans up and exits thus providing no useful information 258 in the debugger hence we call abort() instead of MPI_Abort(). 259 */ 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "Petsc_MPI_AbortOnError" 263 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag) 264 { 265 PetscFunctionBegin; 266 (*PetscErrorPrintf)("MPI error %d\n",*flag); 267 abort(); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "Petsc_MPI_DebuggerOnError" 272 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag) 273 { 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 (*PetscErrorPrintf)("MPI error %d\n",*flag); 278 ierr = PetscAttachDebugger(); 279 if (ierr) { /* hopeless so get out */ 280 MPI_Abort(*comm,*flag); 281 } 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "PetscEnd" 286 /*@C 287 PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one 288 wishes a clean exit somewhere deep in the program. 289 290 Collective on PETSC_COMM_WORLD 291 292 Options Database Keys are the same as for PetscFinalize() 293 294 Level: advanced 295 296 Note: 297 See PetscInitialize() for more general runtime options. 298 299 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize() 300 @*/ 301 PetscErrorCode PetscEnd(void) 302 { 303 PetscFunctionBegin; 304 PetscFinalize(); 305 exit(0); 306 return 0; 307 } 308 309 PetscBool PetscOptionsPublish = PETSC_FALSE; 310 extern PetscErrorCode PetscSetUseTrMalloc_Private(void); 311 extern PetscBool petscsetmallocvisited; 312 static char emacsmachinename[256]; 313 314 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0; 315 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm) = 0; 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "PetscSetHelpVersionFunctions" 319 /*@C 320 PetscSetHelpVersionFunctions - Sets functions that print help and version information 321 before the PETSc help and version information is printed. Must call BEFORE PetscInitialize(). 322 This routine enables a "higher-level" package that uses PETSc to print its messages first. 323 324 Input Parameter: 325 + help - the help function (may be PETSC_NULL) 326 - version - the version function (may be PETSC_NULL) 327 328 Level: developer 329 330 Concepts: package help message 331 332 @*/ 333 PetscErrorCode PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm)) 334 { 335 PetscFunctionBegin; 336 PetscExternalHelpFunction = help; 337 PetscExternalVersionFunction = version; 338 PetscFunctionReturn(0); 339 } 340 341 #undef __FUNCT__ 342 #define __FUNCT__ "PetscOptionsCheckInitial_Private" 343 PetscErrorCode PetscOptionsCheckInitial_Private(void) 344 { 345 char string[64],mname[PETSC_MAX_PATH_LEN],*f; 346 MPI_Comm comm = PETSC_COMM_WORLD; 347 PetscBool flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout; 348 PetscErrorCode ierr; 349 PetscReal si; 350 int i; 351 PetscMPIInt rank; 352 char version[256]; 353 354 PetscFunctionBegin; 355 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 356 357 /* 358 Setup the memory management; support for tracing malloc() usage 359 */ 360 ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr); 361 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD) 362 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr); 363 if ((!flg2 || flg1) && !petscsetmallocvisited) { 364 #if defined(PETSC_HAVE_VALGRIND) 365 if (flg2 || !(RUNNING_ON_VALGRIND)) { 366 /* turn off default -malloc if valgrind is being used */ 367 #endif 368 ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr); 369 #if defined(PETSC_HAVE_VALGRIND) 370 } 371 #endif 372 } 373 #else 374 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr); 375 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr); 376 if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);} 377 #endif 378 if (flg3) { 379 ierr = PetscMallocSetDumpLog();CHKERRQ(ierr); 380 } 381 flg1 = PETSC_FALSE; 382 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr); 383 if (flg1) { 384 ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr); 385 ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr); 386 } 387 388 flg1 = PETSC_FALSE; 389 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr); 390 if (!flg1) { 391 flg1 = PETSC_FALSE; 392 ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr); 393 } 394 if (flg1) { 395 ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr); 396 } 397 398 /* 399 Set the display variable for graphics 400 */ 401 ierr = PetscSetDisplay();CHKERRQ(ierr); 402 403 /* 404 Determine whether user specified maximum number of threads 405 */ 406 ierr = PetscOptionsHasName(PETSC_NULL,"-thread_max",&flg1);CHKERRQ(ierr); 407 if(flg1) { 408 ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr); 409 } 410 411 /* 412 Determine whether to use thread pool 413 */ 414 ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr); 415 if(flg1) { 416 PetscUseThreadPool = PETSC_TRUE; 417 PetscInt N_CORES = get_nprocs(); 418 ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int)); 419 char tstr[9]; 420 char tbuf[2]; 421 strcpy(tstr,"-thread"); 422 for(i=0;i<PetscMaxThreads;i++) { 423 ThreadCoreAffinity[i] = i; //default 424 sprintf(tbuf,"%d",i); 425 strcat(tstr,tbuf); 426 ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr); 427 if(flg1) { 428 ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr); 429 ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; //check on the user 430 } 431 tstr[7] = '\0'; 432 } 433 //get the thread pool type 434 PetscInt ipool = 0; 435 ierr = PetscOptionsGetInt(PETSC_NULL,"-pool",&ipool,PETSC_NULL);CHKERRQ(ierr); 436 switch(ipool) { 437 case 1: 438 PetscThreadFunc = &PetscThreadFunc_Tree; 439 PetscThreadInitialize = &PetscThreadInitialize_Tree; 440 PetscThreadFinalize = &PetscThreadFinalize_Tree; 441 MainWait = &MainWait_Tree; 442 MainJob = &MainJob_Tree; 443 break; 444 case 2: 445 PetscThreadFunc = &PetscThreadFunc_Main; 446 PetscThreadInitialize = &PetscThreadInitialize_Main; 447 PetscThreadFinalize = &PetscThreadFinalize_Main; 448 MainWait = &MainWait_Main; 449 MainJob = &MainJob_Main; 450 break; 451 case 3: 452 PetscThreadFunc = &PetscThreadFunc_Chain; 453 PetscThreadInitialize = &PetscThreadInitialize_Chain; 454 PetscThreadFinalize = &PetscThreadFinalize_Chain; 455 MainWait = &MainWait_Chain; 456 MainJob = &MainJob_Chain; 457 break; 458 default: 459 PetscThreadFunc = &PetscThreadFunc_True; 460 PetscThreadInitialize = &PetscThreadInitialize_True; 461 PetscThreadFinalize = &PetscThreadFinalize_True; 462 MainWait = &MainWait_True; 463 MainJob = &MainJob_True; 464 break; 465 } 466 } 467 PetscThreadInitialize(PetscMaxThreads); 468 /* 469 Print the PETSc version information 470 */ 471 ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr); 472 ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr); 473 ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr); 474 if (flg1 || flg2 || flg3){ 475 476 /* 477 Print "higher-level" package version message 478 */ 479 if (PetscExternalVersionFunction) { 480 ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr); 481 } 482 483 ierr = PetscGetVersion(version,256);CHKERRQ(ierr); 484 ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\ 485 ------------------------------\n");CHKERRQ(ierr); 486 ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr); 487 ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr); 488 ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr); 489 ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr); 490 ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr); 491 ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr); 492 ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\ 493 ------------------------------\n");CHKERRQ(ierr); 494 } 495 496 /* 497 Print "higher-level" package help message 498 */ 499 if (flg3){ 500 if (PetscExternalHelpFunction) { 501 ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr); 502 } 503 } 504 505 /* 506 Setup the error handling 507 */ 508 flg1 = PETSC_FALSE; 509 ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr); 510 if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);} 511 flg1 = PETSC_FALSE; 512 ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr); 513 if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);} 514 flg1 = PETSC_FALSE; 515 ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr); 516 if (flg1) { 517 ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr); 518 } 519 flg1 = PETSC_FALSE; 520 ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr); 521 if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);} 522 flg1 = PETSC_FALSE; 523 ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr); 524 if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);} 525 526 /* 527 Setup debugger information 528 */ 529 ierr = PetscSetDefaultDebugger();CHKERRQ(ierr); 530 ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr); 531 if (flg1) { 532 MPI_Errhandler err_handler; 533 534 ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr); 535 ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr); 536 ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr); 537 ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr); 538 } 539 ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr); 540 if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); } 541 ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr); 542 ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr); 543 if (flg1 || flg2) { 544 PetscMPIInt size; 545 PetscInt lsize,*nodes; 546 MPI_Errhandler err_handler; 547 /* 548 we have to make sure that all processors have opened 549 connections to all other processors, otherwise once the 550 debugger has stated it is likely to receive a SIGUSR1 551 and kill the program. 552 */ 553 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 554 if (size > 2) { 555 PetscMPIInt dummy = 0; 556 MPI_Status status; 557 for (i=0; i<size; i++) { 558 if (rank != i) { 559 ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr); 560 } 561 } 562 for (i=0; i<size; i++) { 563 if (rank != i) { 564 ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr); 565 } 566 } 567 } 568 /* check if this processor node should be in debugger */ 569 ierr = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr); 570 lsize = size; 571 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr); 572 if (flag) { 573 for (i=0; i<lsize; i++) { 574 if (nodes[i] == rank) { flag = PETSC_FALSE; break; } 575 } 576 } 577 if (!flag) { 578 ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr); 579 ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr); 580 if (flg1) { 581 ierr = PetscAttachDebugger();CHKERRQ(ierr); 582 } else { 583 ierr = PetscStopForDebugger();CHKERRQ(ierr); 584 } 585 ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr); 586 ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr); 587 } 588 ierr = PetscFree(nodes);CHKERRQ(ierr); 589 } 590 591 ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr); 592 if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);} 593 594 #if defined(PETSC_USE_SOCKET_VIEWER) 595 /* 596 Activates new sockets for zope if needed 597 */ 598 ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr); 599 ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr); 600 if (flgz){ 601 int sockfd; 602 char hostname[256]; 603 char username[256]; 604 int remoteport = 9999; 605 606 ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr); 607 if (!hostname[0]){ 608 ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr); 609 } 610 ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr); 611 ierr = PetscGetUserName(username, 256);CHKERRQ(ierr); 612 PETSC_ZOPEFD = fdopen(sockfd, "w"); 613 if (flgzout){ 614 PETSC_STDOUT = PETSC_ZOPEFD; 615 fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username); 616 fprintf(PETSC_STDOUT, "<<<start>>>"); 617 } else { 618 fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username); 619 fprintf(PETSC_ZOPEFD, "<<<start>>>"); 620 } 621 } 622 #endif 623 #if defined(PETSC_USE_SERVER) 624 ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr); 625 if (flgz){ 626 PetscInt port = PETSC_DECIDE; 627 ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr); 628 ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr); 629 } 630 #endif 631 632 /* 633 Setup profiling and logging 634 */ 635 #if defined (PETSC_USE_INFO) 636 { 637 char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0; 638 ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr); 639 if (flg1 && logname[0]) { 640 ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr); 641 } else if (flg1) { 642 ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr); 643 } 644 } 645 #endif 646 #if defined(PETSC_USE_LOG) 647 mname[0] = 0; 648 ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); 649 if (flg1) { 650 if (mname[0]) { 651 ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr); 652 } else { 653 ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr); 654 } 655 } 656 #if defined(PETSC_HAVE_MPE) 657 flg1 = PETSC_FALSE; 658 ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr); 659 if (flg1) PetscLogMPEBegin(); 660 #endif 661 flg1 = PETSC_FALSE; 662 flg2 = PETSC_FALSE; 663 flg3 = PETSC_FALSE; 664 ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr); 665 ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr); 666 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr); 667 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr); 668 if (flg1) { ierr = PetscLogAllBegin();CHKERRQ(ierr); } 669 else if (flg2 || flg3 || flg4) { ierr = PetscLogBegin();CHKERRQ(ierr);} 670 671 ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr); 672 if (flg1) { 673 char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN]; 674 FILE *file; 675 if (mname[0]) { 676 sprintf(name,"%s.%d",mname,rank); 677 ierr = PetscFixFilename(name,fname);CHKERRQ(ierr); 678 file = fopen(fname,"w"); 679 if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname); 680 } else { 681 file = PETSC_STDOUT; 682 } 683 ierr = PetscLogTraceBegin(file);CHKERRQ(ierr); 684 } 685 #endif 686 687 /* 688 Setup building of stack frames for all function calls 689 */ 690 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD) 691 ierr = PetscStackCreate();CHKERRQ(ierr); 692 #endif 693 694 ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr); 695 696 /* 697 Print basic help message 698 */ 699 ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr); 700 if (flg1) { 701 ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr); 702 ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr); 703 ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr); 704 ierr = (*PetscHelpPrintf)(comm," only when run in the debugger\n");CHKERRQ(ierr); 705 ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr); 706 ierr = (*PetscHelpPrintf)(comm," start the debugger in new xterm\n");CHKERRQ(ierr); 707 ierr = (*PetscHelpPrintf)(comm," unless noxterm is given\n");CHKERRQ(ierr); 708 ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr); 709 ierr = (*PetscHelpPrintf)(comm," start all processes in the debugger\n");CHKERRQ(ierr); 710 ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr); 711 ierr = (*PetscHelpPrintf)(comm," emacs jumps to error file\n");CHKERRQ(ierr); 712 ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr); 713 ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr); 714 ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr); 715 ierr = (*PetscHelpPrintf)(comm," waits the delay for you to attach\n");CHKERRQ(ierr); 716 ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr); 717 ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr); 718 ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr); 719 ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr); 720 ierr = (*PetscHelpPrintf)(comm," note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr); 721 ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr); 722 ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr); 723 ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr); 724 ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr); 725 ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr); 726 ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr); 727 ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr); 728 ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr); 729 ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr); 730 ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr); 731 ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr); 732 ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr); 733 ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr); 734 #if defined(PETSC_USE_LOG) 735 ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr); 736 ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr); 737 ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr); 738 #if defined(PETSC_HAVE_MPE) 739 ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr); 740 #endif 741 ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr); 742 #endif 743 ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr); 744 ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr); 745 ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr); 746 ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr); 747 } 748 749 ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr); 750 if (flg1) { 751 ierr = PetscSleep(si);CHKERRQ(ierr); 752 } 753 754 ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); 755 ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr); 756 if (f) { 757 ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr); 758 } 759 760 #if defined(PETSC_HAVE_CUSP) 761 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr); 762 if (flg3) flg1 = PETSC_TRUE; 763 else flg1 = PETSC_FALSE; 764 ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr); 765 if (flg1) synchronizeCUSP = PETSC_TRUE; 766 #endif 767 768 PetscFunctionReturn(0); 769 } 770 771 /**** 'Tree' Thread Pool Functions ****/ 772 void* PetscThreadFunc_Tree(void* arg) { 773 PetscErrorCode iterr; 774 int icorr,ierr; 775 int* pId = (int*)arg; 776 int ThreadId = *pId,Mary = 2,i,SubWorker; 777 PetscBool PeeOn; 778 cpu_set_t mset; 779 780 icorr = ThreadCoreAffinity[ThreadId]; 781 CPU_ZERO(&mset); 782 CPU_SET(icorr,&mset); 783 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 784 785 if((Mary*ThreadId+1)>(PetscMaxThreads-1)) { 786 PeeOn = PETSC_TRUE; 787 } 788 else { 789 PeeOn = PETSC_FALSE; 790 } 791 if(PeeOn==PETSC_FALSE) { 792 //check your subordinates, wait for them to be ready 793 for(i=1;i<=Mary;i++) { 794 SubWorker = Mary*ThreadId+i; 795 if(SubWorker<PetscMaxThreads) { 796 ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]); 797 while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) { 798 //upon entry, automically releases the lock and blocks 799 //upon return, has the lock 800 ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]); 801 } 802 ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]); 803 } 804 } 805 //your subordinates are now ready 806 } 807 ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]); 808 //update your ready status 809 *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE; 810 if(ThreadId==0) { 811 job_tree.eJobStat = JobCompleted; 812 //signal main 813 ierr = pthread_cond_signal(&main_cond); 814 } 815 else { 816 //tell your boss that you're ready to work 817 ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]); 818 } 819 //the while loop needs to have an exit 820 //the 'main' thread can terminate all the threads by performing a broadcast 821 //and calling FuncFinish 822 while(PetscThreadGo) { 823 //need to check the condition to ensure we don't have to wait 824 //waiting when you don't have to causes problems 825 //also need to check the condition to ensure proper handling of spurious wakeups 826 while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) { 827 //upon entry, automically releases the lock and blocks 828 //upon return, has the lock 829 ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]); 830 *(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE; 831 *(job_tree.arrThreadReady[ThreadId]) = PETSC_FALSE; 832 } 833 if(ThreadId==0) { 834 job_tree.startJob = PETSC_FALSE; 835 job_tree.eJobStat = ThreadsWorking; 836 } 837 ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]); 838 if(PeeOn==PETSC_FALSE) { 839 //tell your subordinates it's time to get to work 840 for(i=1; i<=Mary; i++) { 841 SubWorker = Mary*ThreadId+i; 842 if(SubWorker<PetscMaxThreads) { 843 ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]); 844 } 845 } 846 } 847 //do your job 848 if(job_tree.pdata==NULL) { 849 iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata); 850 } 851 else { 852 iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]); 853 } 854 if(iterr!=0) { 855 ithreaderr = 1; 856 } 857 if(PetscThreadGo) { 858 //reset job, get ready for more 859 if(PeeOn==PETSC_FALSE) { 860 //check your subordinates, waiting for them to be ready 861 //how do you know for a fact that a given subordinate has actually started? 862 for(i=1;i<=Mary;i++) { 863 SubWorker = Mary*ThreadId+i; 864 if(SubWorker<PetscMaxThreads) { 865 ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]); 866 while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) { 867 //upon entry, automically releases the lock and blocks 868 //upon return, has the lock 869 ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]); 870 } 871 ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]); 872 } 873 } 874 //your subordinates are now ready 875 } 876 ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]); 877 *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE; 878 if(ThreadId==0) { 879 job_tree.eJobStat = JobCompleted; //root thread: last thread to complete, guaranteed! 880 //root thread signals 'main' 881 ierr = pthread_cond_signal(&main_cond); 882 } 883 else { 884 //signal your boss before you go to sleep 885 ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]); 886 } 887 } 888 } 889 return NULL; 890 } 891 892 #undef __FUNCT__ 893 #define __FUNCT__ "PetscThreadInitialize_Tree" 894 void* PetscThreadInitialize_Tree(PetscInt N) { 895 PetscInt i,ierr; 896 int status; 897 898 if(PetscUseThreadPool) { 899 size_t Val1 = (size_t)CACHE_LINE_SIZE; 900 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 901 arrmutex = (char*)memalign(Val1,Val2); 902 arrcond1 = (char*)memalign(Val1,Val2); 903 arrcond2 = (char*)memalign(Val1,Val2); 904 arrstart = (char*)memalign(Val1,Val2); 905 arrready = (char*)memalign(Val1,Val2); 906 job_tree.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 907 job_tree.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 908 job_tree.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 909 job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 910 job_tree.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 911 //initialize job structure 912 for(i=0; i<PetscMaxThreads; i++) { 913 job_tree.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 914 job_tree.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 915 job_tree.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 916 job_tree.arrThreadStarted[i] = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i); 917 job_tree.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 918 } 919 for(i=0; i<PetscMaxThreads; i++) { 920 ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL); 921 ierr = pthread_cond_init(job_tree.cond1array[i],NULL); 922 ierr = pthread_cond_init(job_tree.cond2array[i],NULL); 923 *(job_tree.arrThreadStarted[i]) = PETSC_FALSE; 924 *(job_tree.arrThreadReady[i]) = PETSC_FALSE; 925 } 926 job_tree.pfunc = NULL; 927 job_tree.pdata = (void**)malloc(N*sizeof(void*)); 928 job_tree.startJob = PETSC_FALSE; 929 job_tree.eJobStat = JobInitiated; 930 pVal = (int*)malloc(N*sizeof(int)); 931 //allocate memory in the heap for the thread structure 932 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 933 //create threads 934 for(i=0; i<N; i++) { 935 pVal[i] = i; 936 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 937 //error check 938 } 939 } 940 else { 941 //do nothing 942 } 943 return NULL; 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "PetscThreadFinalize_Tree" 948 PetscErrorCode PetscThreadFinalize_Tree() { 949 int i,ierr; 950 void* jstatus; 951 952 PetscFunctionBegin; 953 954 if(PetscUseThreadPool) { 955 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 956 //join the threads 957 for(i=0; i<PetscMaxThreads; i++) { 958 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 959 //do error checking 960 } 961 free(PetscThreadPoint); 962 free(arrmutex); 963 free(arrcond1); 964 free(arrcond2); 965 free(arrstart); 966 free(arrready); 967 free(job_tree.pdata); 968 free(pVal); 969 } 970 else { 971 } 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MainWait_Tree" 977 void MainWait_Tree() { 978 int ierr; 979 ierr = pthread_mutex_lock(job_tree.mutexarray[0]); 980 while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) { 981 ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]); 982 } 983 ierr = pthread_mutex_unlock(job_tree.mutexarray[0]); 984 } 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "MainJob_Tree" 988 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) { 989 int i,ierr; 990 PetscErrorCode ijoberr = 0; 991 if(PetscUseThreadPool) { 992 MainWait(); 993 job_tree.pfunc = pFunc; 994 job_tree.pdata = data; 995 job_tree.startJob = PETSC_TRUE; 996 for(i=0; i<PetscMaxThreads; i++) { 997 *(job_tree.arrThreadStarted[i]) = PETSC_FALSE; 998 } 999 job_tree.eJobStat = JobInitiated; 1000 ierr = pthread_cond_signal(job_tree.cond2array[0]); 1001 if(pFunc!=FuncFinish) { 1002 MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any) 1003 } 1004 } 1005 else { 1006 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1007 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1008 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1009 free(apThread); 1010 } 1011 if(ithreaderr) { 1012 ijoberr = ithreaderr; 1013 } 1014 return ijoberr; 1015 } 1016 /**** ****/ 1017 1018 /**** 'Main' Thread Pool Functions ****/ 1019 void* PetscThreadFunc_Main(void* arg) { 1020 PetscErrorCode iterr; 1021 int icorr,ierr; 1022 int* pId = (int*)arg; 1023 int ThreadId = *pId; 1024 cpu_set_t mset; 1025 1026 icorr = ThreadCoreAffinity[ThreadId]; 1027 CPU_ZERO(&mset); 1028 CPU_SET(icorr,&mset); 1029 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1030 1031 ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]); 1032 //update your ready status 1033 *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE; 1034 //tell the BOSS that you're ready to work before you go to sleep 1035 ierr = pthread_cond_signal(job_main.cond1array[ThreadId]); 1036 1037 //the while loop needs to have an exit 1038 //the 'main' thread can terminate all the threads by performing a broadcast 1039 //and calling FuncFinish 1040 while(PetscThreadGo) { 1041 //need to check the condition to ensure we don't have to wait 1042 //waiting when you don't have to causes problems 1043 //also need to check the condition to ensure proper handling of spurious wakeups 1044 while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) { 1045 //upon entry, atomically releases the lock and blocks 1046 //upon return, has the lock 1047 ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]); 1048 //*(job_main.arrThreadReady[ThreadId]) = PETSC_FALSE; 1049 } 1050 ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]); 1051 //do your job 1052 if(job_main.pdata==NULL) { 1053 iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata); 1054 } 1055 else { 1056 iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]); 1057 } 1058 if(iterr!=0) { 1059 ithreaderr = 1; 1060 } 1061 if(PetscThreadGo) { 1062 //reset job, get ready for more 1063 ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]); 1064 *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE; 1065 //tell the BOSS that you're ready to work before you go to sleep 1066 ierr = pthread_cond_signal(job_main.cond1array[ThreadId]); 1067 } 1068 } 1069 return NULL; 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "PetscThreadInitialize_Main" 1074 void* PetscThreadInitialize_Main(PetscInt N) { 1075 PetscInt i,ierr; 1076 int status; 1077 1078 if(PetscUseThreadPool) { 1079 size_t Val1 = (size_t)CACHE_LINE_SIZE; 1080 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 1081 arrmutex = (char*)memalign(Val1,Val2); 1082 arrcond1 = (char*)memalign(Val1,Val2); 1083 arrcond2 = (char*)memalign(Val1,Val2); 1084 arrstart = (char*)memalign(Val1,Val2); 1085 arrready = (char*)memalign(Val1,Val2); 1086 job_main.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 1087 job_main.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1088 job_main.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1089 job_main.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1090 //initialize job structure 1091 for(i=0; i<PetscMaxThreads; i++) { 1092 job_main.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 1093 job_main.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 1094 job_main.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 1095 job_main.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 1096 } 1097 for(i=0; i<PetscMaxThreads; i++) { 1098 ierr = pthread_mutex_init(job_main.mutexarray[i],NULL); 1099 ierr = pthread_cond_init(job_main.cond1array[i],NULL); 1100 ierr = pthread_cond_init(job_main.cond2array[i],NULL); 1101 *(job_main.arrThreadReady[i]) = PETSC_FALSE; 1102 } 1103 job_main.pfunc = NULL; 1104 job_main.pdata = (void**)malloc(N*sizeof(void*)); 1105 pVal = (int*)malloc(N*sizeof(int)); 1106 //allocate memory in the heap for the thread structure 1107 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1108 //create threads 1109 for(i=0; i<N; i++) { 1110 pVal[i] = i; 1111 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1112 //error check 1113 } 1114 } 1115 else { 1116 } 1117 return NULL; 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "PetscThreadFinalize_Main" 1122 PetscErrorCode PetscThreadFinalize_Main() { 1123 int i,ierr; 1124 void* jstatus; 1125 1126 PetscFunctionBegin; 1127 1128 if(PetscUseThreadPool) { 1129 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 1130 //join the threads 1131 for(i=0; i<PetscMaxThreads; i++) { 1132 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1133 //do error checking 1134 } 1135 free(PetscThreadPoint); 1136 free(arrmutex); 1137 free(arrcond1); 1138 free(arrcond2); 1139 free(arrstart); 1140 free(arrready); 1141 free(job_main.pdata); 1142 free(pVal); 1143 } 1144 else { 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MainWait_Main" 1151 void MainWait_Main() { 1152 int i,ierr; 1153 for(i=0; i<PetscMaxThreads; i++) { 1154 ierr = pthread_mutex_lock(job_main.mutexarray[i]); 1155 while(*(job_main.arrThreadReady[i])==PETSC_FALSE) { 1156 ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]); 1157 } 1158 ierr = pthread_mutex_unlock(job_main.mutexarray[i]); 1159 } 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MainJob_Main" 1164 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) { 1165 int i,ierr; 1166 PetscErrorCode ijoberr = 0; 1167 if(PetscUseThreadPool) { 1168 MainWait(); //you know everyone is waiting to be signalled! 1169 job_main.pfunc = pFunc; 1170 job_main.pdata = data; 1171 for(i=0; i<PetscMaxThreads; i++) { 1172 *(job_main.arrThreadReady[i]) = PETSC_FALSE; //why do this? suppose you get into MainWait first 1173 } 1174 //tell the threads to go to work 1175 for(i=0; i<PetscMaxThreads; i++) { 1176 ierr = pthread_cond_signal(job_main.cond2array[i]); 1177 } 1178 if(pFunc!=FuncFinish) { 1179 MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any) 1180 } 1181 } 1182 else { 1183 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1184 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1185 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1186 free(apThread); 1187 } 1188 if(ithreaderr) { 1189 ijoberr = ithreaderr; 1190 } 1191 return ijoberr; 1192 } 1193 /**** ****/ 1194 1195 /**** Chain Thread Functions ****/ 1196 void* PetscThreadFunc_Chain(void* arg) { 1197 PetscErrorCode iterr; 1198 int icorr,ierr; 1199 int* pId = (int*)arg; 1200 int ThreadId = *pId; 1201 int SubWorker = ThreadId + 1; 1202 PetscBool PeeOn; 1203 cpu_set_t mset; 1204 1205 icorr = ThreadCoreAffinity[ThreadId]; 1206 CPU_ZERO(&mset); 1207 CPU_SET(icorr,&mset); 1208 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1209 1210 if(ThreadId==(PetscMaxThreads-1)) { 1211 PeeOn = PETSC_TRUE; 1212 } 1213 else { 1214 PeeOn = PETSC_FALSE; 1215 } 1216 if(PeeOn==PETSC_FALSE) { 1217 //check your subordinate, wait for him to be ready 1218 ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]); 1219 while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) { 1220 //upon entry, automically releases the lock and blocks 1221 //upon return, has the lock 1222 ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]); 1223 } 1224 ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]); 1225 //your subordinate is now ready 1226 } 1227 ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]); 1228 //update your ready status 1229 *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE; 1230 if(ThreadId==0) { 1231 job_chain.eJobStat = JobCompleted; 1232 //signal main 1233 ierr = pthread_cond_signal(&main_cond); 1234 } 1235 else { 1236 //tell your boss that you're ready to work 1237 ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]); 1238 } 1239 //the while loop needs to have an exit 1240 //the 'main' thread can terminate all the threads by performing a broadcast 1241 //and calling FuncFinish 1242 while(PetscThreadGo) { 1243 //need to check the condition to ensure we don't have to wait 1244 //waiting when you don't have to causes problems 1245 //also need to check the condition to ensure proper handling of spurious wakeups 1246 while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) { 1247 //upon entry, automically releases the lock and blocks 1248 //upon return, has the lock 1249 ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]); 1250 *(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE; 1251 *(job_chain.arrThreadReady[ThreadId]) = PETSC_FALSE; 1252 } 1253 if(ThreadId==0) { 1254 job_chain.startJob = PETSC_FALSE; 1255 job_chain.eJobStat = ThreadsWorking; 1256 } 1257 ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]); 1258 if(PeeOn==PETSC_FALSE) { 1259 //tell your subworker it's time to get to work 1260 ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]); 1261 } 1262 //do your job 1263 if(job_chain.pdata==NULL) { 1264 iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata); 1265 } 1266 else { 1267 iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]); 1268 } 1269 if(iterr!=0) { 1270 ithreaderr = 1; 1271 } 1272 if(PetscThreadGo) { 1273 //reset job, get ready for more 1274 if(PeeOn==PETSC_FALSE) { 1275 //check your subordinate, wait for him to be ready 1276 //how do you know for a fact that your subordinate has actually started? 1277 ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]); 1278 while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) { 1279 //upon entry, automically releases the lock and blocks 1280 //upon return, has the lock 1281 ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]); 1282 } 1283 ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]); 1284 //your subordinate is now ready 1285 } 1286 ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]); 1287 *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE; 1288 if(ThreadId==0) { 1289 job_chain.eJobStat = JobCompleted; //foreman: last thread to complete, guaranteed! 1290 //root thread (foreman) signals 'main' 1291 ierr = pthread_cond_signal(&main_cond); 1292 } 1293 else { 1294 //signal your boss before you go to sleep 1295 ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]); 1296 } 1297 } 1298 } 1299 return NULL; 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "PetscThreadInitialize_Chain" 1304 void* PetscThreadInitialize_Chain(PetscInt N) { 1305 PetscInt i,ierr; 1306 int status; 1307 1308 if(PetscUseThreadPool) { 1309 size_t Val1 = (size_t)CACHE_LINE_SIZE; 1310 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 1311 arrmutex = (char*)memalign(Val1,Val2); 1312 arrcond1 = (char*)memalign(Val1,Val2); 1313 arrcond2 = (char*)memalign(Val1,Val2); 1314 arrstart = (char*)memalign(Val1,Val2); 1315 arrready = (char*)memalign(Val1,Val2); 1316 job_chain.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 1317 job_chain.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1318 job_chain.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1319 job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1320 job_chain.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1321 //initialize job structure 1322 for(i=0; i<PetscMaxThreads; i++) { 1323 job_chain.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 1324 job_chain.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 1325 job_chain.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 1326 job_chain.arrThreadStarted[i] = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i); 1327 job_chain.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 1328 } 1329 for(i=0; i<PetscMaxThreads; i++) { 1330 ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL); 1331 ierr = pthread_cond_init(job_chain.cond1array[i],NULL); 1332 ierr = pthread_cond_init(job_chain.cond2array[i],NULL); 1333 *(job_chain.arrThreadStarted[i]) = PETSC_FALSE; 1334 *(job_chain.arrThreadReady[i]) = PETSC_FALSE; 1335 } 1336 job_chain.pfunc = NULL; 1337 job_chain.pdata = (void**)malloc(N*sizeof(void*)); 1338 job_chain.startJob = PETSC_FALSE; 1339 job_chain.eJobStat = JobInitiated; 1340 pVal = (int*)malloc(N*sizeof(int)); 1341 //allocate memory in the heap for the thread structure 1342 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1343 //create threads 1344 for(i=0; i<N; i++) { 1345 pVal[i] = i; 1346 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1347 //error check 1348 } 1349 } 1350 else { 1351 } 1352 return NULL; 1353 } 1354 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "PetscThreadFinalize_Chain" 1358 PetscErrorCode PetscThreadFinalize_Chain() { 1359 int i,ierr; 1360 void* jstatus; 1361 1362 PetscFunctionBegin; 1363 1364 if(PetscUseThreadPool) { 1365 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 1366 //join the threads 1367 for(i=0; i<PetscMaxThreads; i++) { 1368 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1369 //do error checking 1370 } 1371 free(PetscThreadPoint); 1372 free(arrmutex); 1373 free(arrcond1); 1374 free(arrcond2); 1375 free(arrstart); 1376 free(arrready); 1377 free(job_chain.pdata); 1378 free(pVal); 1379 } 1380 else { 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MainWait_Chain" 1387 void MainWait_Chain() { 1388 int ierr; 1389 ierr = pthread_mutex_lock(job_chain.mutexarray[0]); 1390 while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) { 1391 ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]); 1392 } 1393 ierr = pthread_mutex_unlock(job_chain.mutexarray[0]); 1394 } 1395 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MainJob_Chain" 1398 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) { 1399 int i,ierr; 1400 PetscErrorCode ijoberr = 0; 1401 if(PetscUseThreadPool) { 1402 MainWait(); 1403 job_chain.pfunc = pFunc; 1404 job_chain.pdata = data; 1405 job_chain.startJob = PETSC_TRUE; 1406 for(i=0; i<PetscMaxThreads; i++) { 1407 *(job_chain.arrThreadStarted[i]) = PETSC_FALSE; 1408 } 1409 job_chain.eJobStat = JobInitiated; 1410 ierr = pthread_cond_signal(job_chain.cond2array[0]); 1411 if(pFunc!=FuncFinish) { 1412 MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any) 1413 } 1414 } 1415 else { 1416 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1417 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1418 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1419 free(apThread); 1420 } 1421 if(ithreaderr) { 1422 ijoberr = ithreaderr; 1423 } 1424 return ijoberr; 1425 } 1426 /**** ****/ 1427 1428 /**** True Thread Functions ****/ 1429 void* PetscThreadFunc_True(void* arg) { 1430 int icorr,ierr,iVal; 1431 int* pId = (int*)arg; 1432 int ThreadId = *pId; 1433 PetscErrorCode iterr; 1434 cpu_set_t mset; 1435 1436 icorr = ThreadCoreAffinity[ThreadId]; 1437 CPU_ZERO(&mset); 1438 CPU_SET(icorr,&mset); 1439 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1440 1441 ierr = pthread_mutex_lock(&job_true.mutex); 1442 job_true.iNumReadyThreads++; 1443 if(job_true.iNumReadyThreads==PetscMaxThreads) { 1444 ierr = pthread_cond_signal(&main_cond); 1445 } 1446 //the while loop needs to have an exit 1447 //the 'main' thread can terminate all the threads by performing a broadcast 1448 //and calling FuncFinish 1449 while(PetscThreadGo) { 1450 //need to check the condition to ensure we don't have to wait 1451 //waiting when you don't have to causes problems 1452 //also need to wait if another thread sneaks in and messes with the predicate 1453 while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) { 1454 //upon entry, automically releases the lock and blocks 1455 //upon return, has the lock 1456 ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex); 1457 } 1458 job_true.startJob = PETSC_FALSE; 1459 job_true.iNumJobThreads--; 1460 job_true.iNumReadyThreads--; 1461 iVal = PetscMaxThreads-job_true.iNumReadyThreads-1; 1462 pthread_mutex_unlock(&job_true.mutex); 1463 if(job_true.pdata==NULL) { 1464 iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata); 1465 } 1466 else { 1467 iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]); 1468 } 1469 if(iterr!=0) { 1470 ithreaderr = 1; 1471 } 1472 //the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads 1473 //what happens if a thread finishes before they all start? BAD! 1474 //what happens if a thread finishes before any else start? BAD! 1475 pthread_barrier_wait(job_true.pbarr); //ensures all threads are finished 1476 //reset job 1477 if(PetscThreadGo) { 1478 pthread_mutex_lock(&job_true.mutex); 1479 job_true.iNumReadyThreads++; 1480 if(job_true.iNumReadyThreads==PetscMaxThreads) { 1481 //signal the 'main' thread that the job is done! (only done once) 1482 ierr = pthread_cond_signal(&main_cond); 1483 } 1484 } 1485 } 1486 return NULL; 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "PetscThreadInitialize_True" 1491 void* PetscThreadInitialize_True(PetscInt N) { 1492 PetscInt i; 1493 int status; 1494 1495 if(PetscUseThreadPool) { 1496 pVal = (int*)malloc(N*sizeof(int)); 1497 //allocate memory in the heap for the thread structure 1498 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1499 BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); //BarrPoint[0] makes no sense, don't use it! 1500 job_true.pdata = (void**)malloc(N*sizeof(void*)); 1501 for(i=0; i<N; i++) { 1502 pVal[i] = i; 1503 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1504 //error check to ensure proper thread creation 1505 status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1); 1506 //error check 1507 } 1508 } 1509 else { 1510 } 1511 return NULL; 1512 } 1513 1514 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "PetscThreadFinalize_True" 1517 PetscErrorCode PetscThreadFinalize_True() { 1518 int i,ierr; 1519 void* jstatus; 1520 1521 PetscFunctionBegin; 1522 1523 if(PetscUseThreadPool) { 1524 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 1525 //join the threads 1526 for(i=0; i<PetscMaxThreads; i++) { 1527 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1528 //do error checking 1529 } 1530 free(BarrPoint); 1531 free(PetscThreadPoint); 1532 } 1533 else { 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MainWait_True" 1540 void MainWait_True() { 1541 int ierr; 1542 while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) { 1543 ierr = pthread_cond_wait(&main_cond,&job_true.mutex); 1544 } 1545 ierr = pthread_mutex_unlock(&job_true.mutex); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MainJob_True" 1550 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) { 1551 int ierr; 1552 PetscErrorCode ijoberr = 0; 1553 if(PetscUseThreadPool) { 1554 MainWait(); 1555 job_true.pfunc = pFunc; 1556 job_true.pdata = data; 1557 job_true.pbarr = &BarrPoint[n]; 1558 job_true.iNumJobThreads = n; 1559 job_true.startJob = PETSC_TRUE; 1560 ierr = pthread_cond_broadcast(&job_true.cond); 1561 if(pFunc!=FuncFinish) { 1562 MainWait(); //why wait after? guarantees that job gets done 1563 } 1564 } 1565 else { 1566 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1567 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1568 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1569 free(apThread); 1570 } 1571 if(ithreaderr) { 1572 ijoberr = ithreaderr; 1573 } 1574 return ijoberr; 1575 } 1576 /**** ****/ 1577 1578 void* FuncFinish(void* arg) { 1579 PetscThreadGo = PETSC_FALSE; 1580 return(0); 1581 } 1582