1#!/usr/bin/env python 2import os,sys 3sys.path.append(os.path.join(os.environ['PETSC_DIR'], 'config')) 4sys.path.append(os.getcwd()) 5from builder2 import buildExample 6from benchmarkBatch import generateBatchScript 7 8class PETSc(object): 9 def __init__(self): 10 return 11 12 def dir(self): 13 '''Return the root directory for the PETSc tree (usually $PETSC_DIR)''' 14 # This should search for a valid PETSc 15 return os.environ['PETSC_DIR'] 16 17 def arch(self): 18 '''Return the PETSc build label (usually $PETSC_ARCH)''' 19 # This should be configurable 20 return os.environ['PETSC_ARCH'] 21 22 def mpiexec(self): 23 '''Return the path for the mpi launch executable''' 24 mpiexec = os.path.join(self.dir(), self.arch(), 'bin', 'mpiexec') 25 if not os.path.isfile(mpiexec): 26 return None 27 return mpiexec 28 29 def example(self, library, num): 30 '''Return the path to the executable for a given example number''' 31 return os.path.join(self.dir(), self.arch(), 'lib', library.lower(), 'ex'+str(num)) 32 33 def source(self, library, num, filenametail): 34 '''Return the path to the sources for a given example number''' 35 d = os.path.join(self.dir(), 'src', library.lower(), 'examples', 'tutorials') 36 name = 'ex'+str(num) 37 sources = [] 38 for f in os.listdir(d): 39 if f == name+'.c': 40 sources.insert(0, f) 41 elif f.startswith(name) and f.endswith(filenametail): 42 sources.append(f) 43 return map(lambda f: os.path.join(d, f), sources) 44 45class PETScExample(object): 46 def __init__(self, library, num, **defaultOptions): 47 self.petsc = PETSc() 48 self.library = library 49 self.num = num 50 self.opts = defaultOptions 51 return 52 53 @staticmethod 54 def runShellCommand(command, cwd = None, log = True): 55 import subprocess 56 57 Popen = subprocess.Popen 58 PIPE = subprocess.PIPE 59 if log: print 'Executing: %s\n' % (command,) 60 pipe = Popen(command, cwd=cwd, stdin=None, stdout=PIPE, stderr=PIPE, bufsize=-1, shell=True, universal_newlines=True) 61 (out, err) = pipe.communicate() 62 ret = pipe.returncode 63 return (out, err, ret) 64 65 def optionsToString(self, **opts): 66 '''Convert a dictionary of options to a command line argument string''' 67 a = [] 68 for key,value in opts.iteritems(): 69 if value is None: 70 a.append('-'+key) 71 else: 72 a.append('-'+key+' '+str(value)) 73 return ' '.join(a) 74 75 def build(self, log = True): 76 sdir = os.path.join(self.petsc.dir(), 'src', self.library.lower(), 'examples', 'tutorials') 77 odir = os.getcwd() 78 os.chdir(sdir) 79 cmd = 'make ex'+str(self.num) 80 out, err, ret = self.runShellCommand(cmd, cwd = sdir, log = log) 81 if err: 82 raise RuntimeError('Unable to build example:\n'+err+out) 83 os.chdir(odir) 84 bdir = os.path.dirname(self.petsc.example(self.library, self.num)) 85 try: 86 os.makedirs(bdir) 87 except OSError: 88 if not os.path.isdir(bdir): 89 raise 90 cmd = 'mv '+os.path.join(sdir, 'ex'+str(self.num))+' '+self.petsc.example(self.library, self.num) 91 out, err, ret = self.runShellCommand(cmd, log = log) 92 if ret: 93 print err 94 print out 95 return 96 97 def run(self, numProcs = 1, log = True, **opts): 98 cmd = '' 99 if self.petsc.mpiexec() is not None: 100 cmd += self.petsc.mpiexec() + ' ' 101 numProcs = os.environ.get('NUM_RANKS', numProcs) 102 cmd += ' -n ' + str(numProcs) + ' ' 103 if os.environ.has_key('PE_HOSTFILE'): 104 cmd += ' -hostfile hostfile ' 105 cmd += ' '.join([self.petsc.example(self.library, self.num), self.optionsToString(**self.opts), self.optionsToString(**opts)]) 106 if 'batch' in opts and opts['batch']: 107 del opts['batch'] 108 filename = generateBatchScript(self.num, numProcs, 120, ' '+self.optionsToString(**self.opts)+' '+self.optionsToString(**opts)) 109 # Submit job 110 out, err, ret = self.runShellCommand('qsub -q gpu '+filename, log = log) 111 if ret: 112 print err 113 print out 114 else: 115 out, err, ret = self.runShellCommand(cmd, log = log) 116 if ret: 117 print err 118 print out 119 return out 120 121def processSummary(moduleName, defaultStage, eventNames, times, events): 122 '''Process the Python log summary into plot data''' 123 m = __import__(moduleName) 124 reload(m) 125 # Total Time 126 times.append(m.Time[0]) 127 # Particular events 128 for name in eventNames: 129 if name.find(':') >= 0: 130 stageName, name = name.split(':', 1) 131 stage = getattr(m, stageName) 132 else: 133 stage = getattr(m, defaultStage) 134 if name in stage.event: 135 if not name in events: 136 events[name] = [] 137 try: 138 events[name].append((max(stage.event[name].Time), sum(stage.event[name].Flops)/(max(stage.event[name].Time) * 1e6))) 139 except ZeroDivisionError: 140 events[name].append((max(stage.event[name].Time), 0)) 141 return 142 143def plotTime(library, num, eventNames, sizes, times, events): 144 from pylab import legend, plot, show, title, xlabel, ylabel 145 import numpy as np 146 147 arches = sizes.keys() 148 data = [] 149 for arch in arches: 150 data.append(sizes[arch]) 151 data.append(times[arch]) 152 plot(*data) 153 title('Performance on '+library+' Example '+str(num)) 154 xlabel('Number of Dof') 155 ylabel('Time (s)') 156 legend(arches, 'upper left', shadow = True) 157 show() 158 return 159 160def plotEventTime(library, num, eventNames, sizes, times, events, filename = None): 161 from pylab import close, legend, plot, savefig, show, title, xlabel, ylabel 162 import numpy as np 163 164 close() 165 arches = sizes.keys() 166 bs = events[arches[0]].keys()[0] 167 data = [] 168 names = [] 169 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 170 for arch, style in zip(arches, ['-', ':']): 171 if event in events[arch][bs]: 172 names.append(arch+'-'+str(bs)+' '+event) 173 data.append(sizes[arch][bs]) 174 data.append(np.array(events[arch][bs][event])[:,0]) 175 data.append(color+style) 176 else: 177 print 'Could not find %s in %s-%d events' % (event, arch, bs) 178 print data 179 plot(*data) 180 title('Performance on '+library+' Example '+str(num)) 181 xlabel('Number of Dof') 182 ylabel('Time (s)') 183 legend(names, 'upper left', shadow = True) 184 if filename is None: 185 show() 186 else: 187 savefig(filename) 188 return 189 190def plotEventFlop(library, num, eventNames, sizes, times, events, filename = None): 191 from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel 192 import numpy as np 193 194 arches = sizes.keys() 195 bs = events[arches[0]].keys()[0] 196 data = [] 197 names = [] 198 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 199 for arch, style in zip(arches, ['-', ':']): 200 if event in events[arch][bs]: 201 names.append(arch+'-'+str(bs)+' '+event) 202 data.append(sizes[arch][bs]) 203 data.append(1e-3*np.array(events[arch][bs][event])[:,1]) 204 data.append(color+style) 205 else: 206 print 'Could not find %s in %s-%d events' % (event, arch, bs) 207 semilogy(*data) 208 title('Performance on '+library+' Example '+str(num)) 209 xlabel('Number of Dof') 210 ylabel('Computation Rate (GF/s)') 211 legend(names, 'upper left', shadow = True) 212 if filename is None: 213 show() 214 else: 215 savefig(filename) 216 return 217 218def plotEventScaling(library, num, eventNames, procs, events, filename = None): 219 from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel 220 import numpy as np 221 222 arches = procs.keys() 223 bs = events[arches[0]].keys()[0] 224 data = [] 225 names = [] 226 for arch, style in zip(arches, ['-', ':']): 227 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 228 if event in events[arch][bs]: 229 names.append(arch+'-'+str(bs)+' '+event) 230 data.append(procs[arch][bs]) 231 data.append(1e-3*np.array(events[arch][bs][event])[:,1]) 232 data.append(color+style) 233 else: 234 print 'Could not find %s in %s-%d events' % (event, arch, bs) 235 plot(*data) 236 title('Performance on '+library+' Example '+str(num)) 237 xlabel('Number of Processors') 238 ylabel('Computation Rate (GF/s)') 239 legend(names, 'upper left', shadow = True) 240 if filename is None: 241 show() 242 else: 243 savefig(filename) 244 return 245 246def plotSummaryLine(library, num, eventNames, sizes, times, events): 247 from pylab import legend, plot, show, title, xlabel, ylabel 248 import numpy as np 249 showTime = False 250 showEventTime = True 251 showEventFlops = True 252 arches = sizes.keys() 253 # Time 254 if showTime: 255 data = [] 256 for arch in arches: 257 data.append(sizes[arch]) 258 data.append(times[arch]) 259 plot(*data) 260 title('Performance on '+library+' Example '+str(num)) 261 xlabel('Number of Dof') 262 ylabel('Time (s)') 263 legend(arches, 'upper left', shadow = True) 264 show() 265 # Common event time 266 # We could make a stacked plot like Rio uses here 267 if showEventTime: 268 bs = events[arches[0]].keys()[0] 269 data = [] 270 names = [] 271 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 272 for arch, style in zip(arches, ['-', ':']): 273 if event in events[arch][bs]: 274 names.append(arch+'-'+str(bs)+' '+event) 275 data.append(sizes[arch][bs]) 276 data.append(np.array(events[arch][bs][event])[:,0]) 277 data.append(color+style) 278 else: 279 print 'Could not find %s in %s-%d events' % (event, arch, bs) 280 print data 281 plot(*data) 282 title('Performance on '+library+' Example '+str(num)) 283 xlabel('Number of Dof') 284 ylabel('Time (s)') 285 legend(names, 'upper left', shadow = True) 286 show() 287 # Common event flops 288 # We could make a stacked plot like Rio uses here 289 if showEventFlops: 290 bs = events[arches[0]].keys()[0] 291 data = [] 292 names = [] 293 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 294 for arch, style in zip(arches, ['-', ':']): 295 if event in events[arch][bs]: 296 names.append(arch+'-'+str(bs)+' '+event) 297 data.append(sizes[arch][bs]) 298 data.append(np.array(events[arch][bs][event])[:,1]) 299 data.append(color+style) 300 else: 301 print 'Could not find %s in %s-%d events' % (event, arch, bs) 302 plot(*data) 303 title('Performance on '+library+' Example '+str(num)) 304 xlabel('Number of Dof') 305 ylabel('Computation Rate (MF/s)') 306 legend(names, 'upper left', shadow = True) 307 show() 308 return 309 310def plotSummaryBar(library, num, eventNames, sizes, times, events): 311 import numpy as np 312 import matplotlib.pyplot as plt 313 314 eventColors = ['b', 'g', 'r', 'y'] 315 arches = sizes.keys() 316 names = [] 317 N = len(sizes[arches[0]]) 318 width = 0.2 319 ind = np.arange(N) - 0.25 320 bars = {} 321 for arch in arches: 322 bars[arch] = [] 323 bottom = np.zeros(N) 324 for event, color in zip(eventNames, eventColors): 325 names.append(arch+' '+event) 326 times = np.array(events[arch][event])[:,0] 327 bars[arch].append(plt.bar(ind, times, width, color=color, bottom=bottom)) 328 bottom += times 329 ind += 0.3 330 331 plt.xlabel('Number of Dof') 332 plt.ylabel('Time (s)') 333 plt.title('GPU vs. CPU Performance on '+library+' Example '+str(num)) 334 plt.xticks(np.arange(N), map(str, sizes[arches[0]])) 335 #plt.yticks(np.arange(0,81,10)) 336 #plt.legend( (p1[0], p2[0]), ('Men', 'Women') ) 337 plt.legend([bar[0] for bar in bars[arches[0]]], eventNames, 'upper right', shadow = True) 338 339 plt.show() 340 return 341 342def getDMComplexSize(dim, out): 343 '''Retrieves the number of cells from -dm_view output''' 344 size = 0 345 for line in out.split('\n'): 346 if line.strip().startswith(str(dim)+'-cells: '): 347 sizes = line.strip()[9:].split() 348 size = sum(map(int, sizes)) 349 break 350 return size 351 352def run_DMDA(ex, name, opts, args, sizes, times, events, log=True): 353 for n in map(int, args.size): 354 ex.run(log=log, da_grid_x=n, da_grid_y=n, **opts) 355 sizes[name].append(n*n * args.comp) 356 processSummary('summary', args.stage, args.events, times[name], events[name]) 357 return 358 359def run_DMComplex(ex, name, opts, args, sizes, times, events, log=True): 360 # This should eventually be replaced by a direct FFC/Ignition interface 361 if args.operator == 'laplacian': 362 numComp = 1 363 elif args.operator == 'elasticity': 364 numComp = args.dim 365 else: 366 raise RuntimeError('Unknown operator: %s' % args.operator) 367 368 for numBlock in [2**i for i in map(int, args.blockExp)]: 369 opts['gpu_blocks'] = numBlock 370 args.files = ['['+','.join(source)+']'] 371 buildExample(args) 372 sizes[name][numBlock] = [] 373 times[name][numBlock] = [] 374 events[name][numBlock] = {} 375 for r in map(float, args.refine): 376 out = ex.run(log=log, refinement_limit=r, **opts) 377 sizes[name][numBlock].append(getDMComplexSize(args.dim, out)) 378 processSummary('summary', args.stage, args.events, times[name][numBlock], events[name][numBlock]) 379 return 380 381def outputData(sizes, times, events, name = 'output.py'): 382 if os.path.exists(name): 383 base, ext = os.path.splitext(name) 384 num = 1 385 while os.path.exists(base+str(num)+ext): 386 num += 1 387 name = base+str(num)+ext 388 with file(name, 'w') as f: 389 f.write('#PETSC_ARCH='+os.environ['PETSC_ARCH']+' '+' '.join(sys.argv)+'\n') 390 f.write('sizes = '+repr(sizes)+'\n') 391 f.write('times = '+repr(times)+'\n') 392 f.write('events = '+repr(events)+'\n') 393 return 394 395if __name__ == '__main__': 396 import argparse 397 398 parser = argparse.ArgumentParser(description = 'PETSc Benchmarking', 399 epilog = 'This script runs src/<library>/examples/tutorials/ex<num>, For more information, visit http://www.mcs.anl.gov/petsc', 400 formatter_class = argparse.ArgumentDefaultsHelpFormatter) 401 parser.add_argument('--library', default='SNES', help='The PETSc library used in this example') 402 parser.add_argument('--num', type = int, default='5', help='The example number') 403 parser.add_argument('--module', default='summary', help='The module for timing output') 404 parser.add_argument('--stage', default='Main_Stage', help='The default logging stage') 405 parser.add_argument('--events', nargs='+', help='Events to process') 406 parser.add_argument('--batch', action='store_true', default=False, help='Generate batch files for the runs instead') 407 parser.add_argument('--daemon', action='store_true', default=False, help='Run as a daemon') 408 parser.add_argument('--gpulang', default='OpenCL', help='GPU Language to use: Either CUDA or OpenCL (default)') 409 subparsers = parser.add_subparsers(help='DM types') 410 411 parser_dmda = subparsers.add_parser('DMDA', help='Use a DMDA for the problem geometry') 412 parser_dmda.add_argument('--size', nargs='+', default=['10'], help='Grid size (implementation dependent)') 413 parser_dmda.add_argument('--comp', type = int, default='1', help='Number of field components') 414 parser_dmda.add_argument('runs', nargs='*', help='Run descriptions: <name>=<args>') 415 416 parser_dmmesh = subparsers.add_parser('DMComplex', help='Use a DMComplex for the problem geometry') 417 parser_dmmesh.add_argument('--dim', type = int, default='2', help='Spatial dimension') 418 parser_dmmesh.add_argument('--refine', nargs='+', default=['0.0'], help='List of refinement limits') 419 parser_dmmesh.add_argument('--order', type = int, default='1', help='Order of the finite element') 420 parser_dmmesh.add_argument('--operator', default='laplacian', help='The operator name') 421 parser_dmmesh.add_argument('--blockExp', nargs='+', default=range(0, 5), help='List of block exponents j, block size is 2^j') 422 parser_dmmesh.add_argument('runs', nargs='*', help='Run descriptions: <name>=<args>') 423 424 args = parser.parse_args() 425 print(args) 426 if hasattr(args, 'comp'): 427 args.dmType = 'DMDA' 428 else: 429 args.dmType = 'DMComplex' 430 431 ex = PETScExample(args.library, args.num, log_summary='summary.dat', log_summary_python = None if args.batch else args.module+'.py', preload='off') 432 if args.gpulang == 'CUDA': 433 source = ex.petsc.source(args.library, args.num, '.cu') 434 else: 435 source = ex.petsc.source(args.library, args.num, 'OpenCL.c') # Using the convention of OpenCL code residing in source files ending in 'OpenCL.c' (at least for snes/ex52) 436 sizes = {} 437 times = {} 438 events = {} 439 log = not args.daemon 440 441 if args.daemon: 442 import daemon 443 print 'Starting daemon' 444 daemon.createDaemon('.') 445 446 for run in args.runs: 447 name, stropts = run.split('=', 1) 448 opts = dict([t if len(t) == 2 else (t[0], None) for t in [arg.split('=', 1) for arg in stropts.split(' ')]]) 449 if args.dmType == 'DMDA': 450 sizes[name] = [] 451 times[name] = [] 452 events[name] = {} 453 run_DMDA(ex, name, opts, args, sizes, times, events, log=log) 454 elif args.dmType == 'DMComplex': 455 sizes[name] = {} 456 times[name] = {} 457 events[name] = {} 458 run_DMComplex(ex, name, opts, args, sizes, times, events, log=log) 459 outputData(sizes, times, events) 460 if not args.batch and log: plotSummaryLine(args.library, args.num, args.events, sizes, times, events) 461# Benchmark for ex50 462# ./src/benchmarks/benchmarkExample.py --events VecMDot VecMAXPY KSPGMRESOrthog MatMult VecCUSPCopyTo VecCUSPCopyFrom MatCUSPCopyTo --num 50 DMDA --size 10 20 50 100 --comp 4 CPU='pc_type=none mat_no_inode dm_vec_type=seq dm_mat_type=seqaij' GPU='pc_type=none mat_no_inode dm_vec_type=seqcusp dm_mat_type=seqaijcusp cusp_synchronize' 463# Benchmark for ex52 464# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dm_view show_residual=0 compute_function batch' GPU='dm_view show_residual=0 compute_function batch gpu gpu_batches=8' 465# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 --operator=elasticity CPU='dm_view op_type=elasticity show_residual=0 compute_function batch' GPU='dm_view op_type=elasticity show_residual=0 compute_function batch gpu gpu_batches=8' 466# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --dim=3 --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dim=3 dm_view show_residual=0 compute_function batch' GPU='dim=3 dm_view show_residual=0 compute_function batch gpu gpu_batches=8' 467