1#!/usr/bin/env python 2import os,sys 3sys.path.append(os.path.join(os.environ['PETSC_DIR'], 'config')) 4sys.path.append(os.getcwd()) 5from builder2 import buildExample 6from benchmarkBatch import generateBatchScript 7 8class PETSc(object): 9 def __init__(self): 10 return 11 12 def dir(self): 13 '''Return the root directory for the PETSc tree (usually $PETSC_DIR)''' 14 # This should search for a valid PETSc 15 return os.environ['PETSC_DIR'] 16 17 def arch(self): 18 '''Return the PETSc build label (usually $PETSC_ARCH)''' 19 # This should be configurable 20 return os.environ['PETSC_ARCH'] 21 22 def mpiexec(self): 23 '''Return the path for the mpi launch executable''' 24 mpiexec = os.path.join(self.dir(), self.arch(), 'bin', 'mpiexec') 25 if not os.path.isfile(mpiexec): 26 return None 27 return mpiexec 28 29 def example(self, num): 30 '''Return the path to the executable for a given example number''' 31 return os.path.join(self.dir(), self.arch(), 'lib', 'ex'+str(num)+'-obj', 'ex'+str(num)) 32 33 def source(self, library, num): 34 '''Return the path to the sources for a given example number''' 35 d = os.path.join(self.dir(), 'src', library.lower(), 'examples', 'tutorials') 36 name = 'ex'+str(num) 37 sources = [] 38 for f in os.listdir(d): 39 if f == name+'.c': 40 sources.insert(0, f) 41 elif f.startswith(name) and f.endswith('.cu'): 42 sources.append(f) 43 return map(lambda f: os.path.join(d, f), sources) 44 45class PETScExample(object): 46 def __init__(self, library, num, **defaultOptions): 47 self.petsc = PETSc() 48 self.library = library 49 self.num = num 50 self.opts = defaultOptions 51 return 52 53 @staticmethod 54 def runShellCommand(command, cwd = None, log = True): 55 import subprocess 56 57 Popen = subprocess.Popen 58 PIPE = subprocess.PIPE 59 if log: print 'Executing: %s\n' % (command,) 60 pipe = Popen(command, cwd=cwd, stdin=None, stdout=PIPE, stderr=PIPE, bufsize=-1, shell=True, universal_newlines=True) 61 (out, err) = pipe.communicate() 62 ret = pipe.returncode 63 return (out, err, ret) 64 65 def optionsToString(self, **opts): 66 '''Convert a dictionary of options to a command line argument string''' 67 a = [] 68 for key,value in opts.iteritems(): 69 if value is None: 70 a.append('-'+key) 71 else: 72 a.append('-'+key+' '+str(value)) 73 return ' '.join(a) 74 75 def run(self, numProcs = 1, log = True, **opts): 76 if self.petsc.mpiexec() is None: 77 cmd = self.petsc.example(self.num) 78 else: 79 cmd = ' '.join([self.petsc.mpiexec(), '-n', str(numProcs), self.petsc.example(self.num)]) 80 cmd += ' '+self.optionsToString(**self.opts)+' '+self.optionsToString(**opts) 81 if 'batch' in opts and opts['batch']: 82 del opts['batch'] 83 filename = generateBatchScript(self.num, numProcs, 120, ' '+self.optionsToString(**self.opts)+' '+self.optionsToString(**opts)) 84 # Submit job 85 out, err, ret = self.runShellCommand('qsub -q gpu '+filename, log = log) 86 if ret: 87 print err 88 print out 89 else: 90 out, err, ret = self.runShellCommand(cmd, log = log) 91 if ret: 92 print err 93 print out 94 return out 95 96def processSummary(moduleName, defaultStage, eventNames, times, events): 97 '''Process the Python log summary into plot data''' 98 m = __import__(moduleName) 99 reload(m) 100 # Total Time 101 times.append(m.Time[0]) 102 # Particular events 103 for name in eventNames: 104 if name.find(':') >= 0: 105 stageName, name = name.split(':', 1) 106 stage = getattr(m, stageName) 107 else: 108 stage = getattr(m, defaultStage) 109 if name in stage.event: 110 if not name in events: 111 events[name] = [] 112 try: 113 events[name].append((stage.event[name].Time[0], stage.event[name].Flops[0]/(stage.event[name].Time[0] * 1e6))) 114 except ZeroDivisionError: 115 events[name].append((stage.event[name].Time[0], 0)) 116 return 117 118def plotTime(library, num, eventNames, sizes, times, events): 119 from pylab import legend, plot, show, title, xlabel, ylabel 120 import numpy as np 121 122 arches = sizes.keys() 123 data = [] 124 for arch in arches: 125 data.append(sizes[arch]) 126 data.append(times[arch]) 127 plot(*data) 128 title('Performance on '+library+' Example '+str(num)) 129 xlabel('Number of Dof') 130 ylabel('Time (s)') 131 legend(arches, 'upper left', shadow = True) 132 show() 133 return 134 135def plotEventTime(library, num, eventNames, sizes, times, events, filename = None): 136 from pylab import close, legend, plot, savefig, show, title, xlabel, ylabel 137 import numpy as np 138 139 close() 140 arches = sizes.keys() 141 bs = events[arches[0]].keys()[0] 142 data = [] 143 names = [] 144 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 145 for arch, style in zip(arches, ['-', ':']): 146 if event in events[arch][bs]: 147 names.append(arch+'-'+str(bs)+' '+event) 148 data.append(sizes[arch][bs]) 149 data.append(np.array(events[arch][bs][event])[:,0]) 150 data.append(color+style) 151 else: 152 print 'Could not find %s in %s-%d events' % (event, arch, bs) 153 print data 154 plot(*data) 155 title('Performance on '+library+' Example '+str(num)) 156 xlabel('Number of Dof') 157 ylabel('Time (s)') 158 legend(names, 'upper left', shadow = True) 159 if filename is None: 160 show() 161 else: 162 savefig(filename) 163 return 164 165def plotEventFlop(library, num, eventNames, sizes, times, events, filename = None): 166 from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel 167 import numpy as np 168 169 arches = sizes.keys() 170 bs = events[arches[0]].keys()[0] 171 data = [] 172 names = [] 173 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 174 for arch, style in zip(arches, ['-', ':']): 175 if event in events[arch][bs]: 176 names.append(arch+'-'+str(bs)+' '+event) 177 data.append(sizes[arch][bs]) 178 data.append(1e-3*np.array(events[arch][bs][event])[:,1]) 179 data.append(color+style) 180 else: 181 print 'Could not find %s in %s-%d events' % (event, arch, bs) 182 semilogy(*data) 183 title('Performance on '+library+' Example '+str(num)) 184 xlabel('Number of Dof') 185 ylabel('Computation Rate (GF/s)') 186 legend(names, 'upper left', shadow = True) 187 if filename is None: 188 show() 189 else: 190 savefig(filename) 191 return 192 193def plotEventScaling(library, num, eventNames, procs, events, filename = None): 194 from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel 195 import numpy as np 196 197 arches = procs.keys() 198 bs = events[arches[0]].keys()[0] 199 data = [] 200 names = [] 201 for arch, style in zip(arches, ['-', ':']): 202 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 203 if event in events[arch][bs]: 204 names.append(arch+'-'+str(bs)+' '+event) 205 data.append(procs[arch][bs]) 206 data.append(1e-3*np.array(events[arch][bs][event])[:,1]) 207 data.append(color+style) 208 else: 209 print 'Could not find %s in %s-%d events' % (event, arch, bs) 210 plot(*data) 211 title('Performance on '+library+' Example '+str(num)) 212 xlabel('Number of Processors') 213 ylabel('Computation Rate (GF/s)') 214 legend(names, 'upper left', shadow = True) 215 if filename is None: 216 show() 217 else: 218 savefig(filename) 219 return 220 221def plotSummaryLine(library, num, eventNames, sizes, times, events): 222 from pylab import legend, plot, show, title, xlabel, ylabel 223 import numpy as np 224 showTime = False 225 showEventTime = True 226 showEventFlops = True 227 arches = sizes.keys() 228 # Time 229 if showTime: 230 data = [] 231 for arch in arches: 232 data.append(sizes[arch]) 233 data.append(times[arch]) 234 plot(*data) 235 title('Performance on '+library+' Example '+str(num)) 236 xlabel('Number of Dof') 237 ylabel('Time (s)') 238 legend(arches, 'upper left', shadow = True) 239 show() 240 # Common event time 241 # We could make a stacked plot like Rio uses here 242 if showEventTime: 243 bs = events[arches[0]].keys()[0] 244 data = [] 245 names = [] 246 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 247 for arch, style in zip(arches, ['-', ':']): 248 if event in events[arch][bs]: 249 names.append(arch+'-'+str(bs)+' '+event) 250 data.append(sizes[arch][bs]) 251 data.append(np.array(events[arch][bs][event])[:,0]) 252 data.append(color+style) 253 else: 254 print 'Could not find %s in %s-%d events' % (event, arch, bs) 255 print data 256 plot(*data) 257 title('Performance on '+library+' Example '+str(num)) 258 xlabel('Number of Dof') 259 ylabel('Time (s)') 260 legend(names, 'upper left', shadow = True) 261 show() 262 # Common event flops 263 # We could make a stacked plot like Rio uses here 264 if showEventFlops: 265 bs = events[arches[0]].keys()[0] 266 data = [] 267 names = [] 268 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 269 for arch, style in zip(arches, ['-', ':']): 270 if event in events[arch][bs]: 271 names.append(arch+'-'+str(bs)+' '+event) 272 data.append(sizes[arch][bs]) 273 data.append(np.array(events[arch][bs][event])[:,1]) 274 data.append(color+style) 275 else: 276 print 'Could not find %s in %s-%d events' % (event, arch, bs) 277 plot(*data) 278 title('Performance on '+library+' Example '+str(num)) 279 xlabel('Number of Dof') 280 ylabel('Computation Rate (MF/s)') 281 legend(names, 'upper left', shadow = True) 282 show() 283 return 284 285def plotSummaryBar(library, num, eventNames, sizes, times, events): 286 import numpy as np 287 import matplotlib.pyplot as plt 288 289 eventColors = ['b', 'g', 'r', 'y'] 290 arches = sizes.keys() 291 names = [] 292 N = len(sizes[arches[0]]) 293 width = 0.2 294 ind = np.arange(N) - 0.25 295 bars = {} 296 for arch in arches: 297 bars[arch] = [] 298 bottom = np.zeros(N) 299 for event, color in zip(eventNames, eventColors): 300 names.append(arch+' '+event) 301 times = np.array(events[arch][event])[:,0] 302 bars[arch].append(plt.bar(ind, times, width, color=color, bottom=bottom)) 303 bottom += times 304 ind += 0.3 305 306 plt.xlabel('Number of Dof') 307 plt.ylabel('Time (s)') 308 plt.title('GPU vs. CPU Performance on '+library+' Example '+str(num)) 309 plt.xticks(np.arange(N), map(str, sizes[arches[0]])) 310 #plt.yticks(np.arange(0,81,10)) 311 #plt.legend( (p1[0], p2[0]), ('Men', 'Women') ) 312 plt.legend([bar[0] for bar in bars[arches[0]]], eventNames, 'upper right', shadow = True) 313 314 plt.show() 315 return 316 317def getDMComplexSize(dim, out): 318 '''Retrieves the number of cells from ''' 319 size = 0 320 for line in out.split('\n'): 321 if line.strip().startswith(str(dim)+'-cells: '): 322 size = int(line.strip()[9:]) 323 break 324 return size 325 326def run_DMDA(ex, name, opts, args, sizes, times, events, log=True): 327 for n in map(int, args.size): 328 ex.run(log=log, da_grid_x=n, da_grid_y=n, **opts) 329 sizes[name].append(n*n * args.comp) 330 processSummary('summary', args.stage, args.events, times[name], events[name]) 331 return 332 333def run_DMComplex(ex, name, opts, args, sizes, times, events, log=True): 334 # This should eventually be replaced by a direct FFC/Ignition interface 335 if args.operator == 'laplacian': 336 numComp = 1 337 elif args.operator == 'elasticity': 338 numComp = args.dim 339 else: 340 raise RuntimeError('Unknown operator: %s' % args.operator) 341 342 for numBlock in [2**i for i in map(int, args.blockExp)]: 343 opts['gpu_blocks'] = numBlock 344 # Generate new block size 345 cmd = os.environ.get('PETSC_DIR', '.') 346 cmd += '/bin/pythonscripts/PetscGenerateFEMQuadrature.py %d %d %d %d %s %s.h' % (args.dim, args.order, numComp, numBlock, args.operator, os.path.splitext(source[0])[0]) 347 print(cmd) 348 ret = os.system('python '+cmd) 349 args.files = ['['+','.join(source)+']'] 350 buildExample(args) 351 sizes[name][numBlock] = [] 352 times[name][numBlock] = [] 353 events[name][numBlock] = {} 354 for r in map(float, args.refine): 355 out = ex.run(log=log, refinement_limit=r, **opts) 356 sizes[name][numBlock].append(getDMComplexSize(args.dim, out)) 357 processSummary('summary', args.stage, args.events, times[name][numBlock], events[name][numBlock]) 358 return 359 360def outputData(sizes, times, events, name = 'output.py'): 361 if os.path.exists(name): 362 base, ext = os.path.splitext(name) 363 num = 1 364 while os.path.exists(base+str(num)+ext): 365 num += 1 366 name = base+str(num)+ext 367 with file(name, 'w') as f: 368 f.write('#PETSC_ARCH='+os.environ['PETSC_ARCH']+' '+' '.join(sys.argv)+'\n') 369 f.write('sizes = '+repr(sizes)+'\n') 370 f.write('times = '+repr(times)+'\n') 371 f.write('events = '+repr(events)+'\n') 372 return 373 374if __name__ == '__main__': 375 import argparse 376 377 parser = argparse.ArgumentParser(description = 'PETSc Benchmarking', 378 epilog = 'This script runs src/<library>/examples/tutorials/ex<num>, For more information, visit http://www.mcs.anl.gov/petsc', 379 formatter_class = argparse.ArgumentDefaultsHelpFormatter) 380 parser.add_argument('--library', default='SNES', help='The PETSc library used in this example') 381 parser.add_argument('--num', type = int, default='5', help='The example number') 382 parser.add_argument('--module', default='summary', help='The module for timing output') 383 parser.add_argument('--stage', default='Main_Stage', help='The default logging stage') 384 parser.add_argument('--events', nargs='+', help='Events to process') 385 parser.add_argument('--batch', action='store_true', default=False, help='Generate batch files for the runs instead') 386 parser.add_argument('--daemon', action='store_true', default=False, help='Run as a daemon') 387 subparsers = parser.add_subparsers(help='DM types') 388 389 parser_dmda = subparsers.add_parser('DMDA', help='Use a DMDA for the problem geometry') 390 parser_dmda.add_argument('--size', nargs='+', default=['10'], help='Grid size (implementation dependent)') 391 parser_dmda.add_argument('--comp', type = int, default='1', help='Number of field components') 392 parser_dmda.add_argument('runs', nargs='*', help='Run descriptions: <name>=<args>') 393 394 parser_dmmesh = subparsers.add_parser('DMComplex', help='Use a DMComplex for the problem geometry') 395 parser_dmmesh.add_argument('--dim', type = int, default='2', help='Spatial dimension') 396 parser_dmmesh.add_argument('--refine', nargs='+', default=['0.0'], help='List of refinement limits') 397 parser_dmmesh.add_argument('--order', type = int, default='1', help='Order of the finite element') 398 parser_dmmesh.add_argument('--operator', default='laplacian', help='The operator name') 399 parser_dmmesh.add_argument('--blockExp', nargs='+', default=range(0, 5), help='List of block exponents j, block size is 2^j') 400 parser_dmmesh.add_argument('runs', nargs='*', help='Run descriptions: <name>=<args>') 401 402 args = parser.parse_args() 403 print(args) 404 if hasattr(args, 'comp'): 405 args.dmType = 'DMDA' 406 else: 407 args.dmType = 'DMComplex' 408 409 ex = PETScExample(args.library, args.num, log_summary='summary.dat', log_summary_python = None if args.batch else args.module+'.py', preload='off') 410 source = ex.petsc.source(args.library, args.num) 411 sizes = {} 412 times = {} 413 events = {} 414 log = not args.daemon 415 416 if args.daemon: 417 import daemon 418 print 'Starting daemon' 419 daemon.createDaemon('.') 420 421 for run in args.runs: 422 name, stropts = run.split('=', 1) 423 opts = dict([t if len(t) == 2 else (t[0], None) for t in [arg.split('=', 1) for arg in stropts.split(' ')]]) 424 if args.dmType == 'DMDA': 425 sizes[name] = [] 426 times[name] = [] 427 events[name] = {} 428 run_DMDA(ex, name, opts, args, sizes, times, events, log=log) 429 elif args.dmType == 'DMComplex': 430 sizes[name] = {} 431 times[name] = {} 432 events[name] = {} 433 run_DMComplex(ex, name, opts, args, sizes, times, events, log=log) 434 outputData(sizes, times, events) 435 if not args.batch and log: plotSummaryLine(args.library, args.num, args.events, sizes, times, events) 436# Benchmark for ex50 437# ./src/benchmarks/benchmarkExample.py --events VecMDot VecMAXPY KSPGMRESOrthog MatMult VecCUSPCopyTo VecCUSPCopyFrom MatCUSPCopyTo --num 50 DMDA --size 10 20 50 100 --comp 4 CPU='pc_type=none mat_no_inode dm_vec_type=seq dm_mat_type=seqaij' GPU='pc_type=none mat_no_inode dm_vec_type=seqcusp dm_mat_type=seqaijcusp cusp_synchronize' 438# Benchmark for ex52 439# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dm_view show_residual=0 compute_function batch' GPU='dm_view show_residual=0 compute_function batch gpu gpu_batches=8' 440# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 --operator=elasticity CPU='dm_view op_type=elasticity show_residual=0 compute_function batch' GPU='dm_view op_type=elasticity show_residual=0 compute_function batch gpu gpu_batches=8' 441# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --dim=3 --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dim=3 dm_view show_residual=0 compute_function batch' GPU='dim=3 dm_view show_residual=0 compute_function batch gpu gpu_batches=8' 442