1#!/usr/bin/env python 2import os,sys 3sys.path.append(os.path.join(os.environ['PETSC_DIR'], 'config')) 4from builder2 import buildExample 5from benchmarkBatch import generateBatchScript 6 7class PETSc(object): 8 def __init__(self): 9 return 10 11 def dir(self): 12 '''Return the root directory for the PETSc tree (usually $PETSC_DIR)''' 13 # This should search for a valid PETSc 14 return os.environ['PETSC_DIR'] 15 16 def arch(self): 17 '''Return the PETSc build label (usually $PETSC_ARCH)''' 18 # This should be configurable 19 return os.environ['PETSC_ARCH'] 20 21 def mpiexec(self): 22 '''Return the path for the mpi launch executable''' 23 mpiexec = os.path.join(self.dir(), self.arch(), 'bin', 'mpiexec') 24 if not os.path.isfile(mpiexec): 25 return None 26 return mpiexec 27 28 def example(self, num): 29 '''Return the path to the executable for a given example number''' 30 return os.path.join(self.dir(), self.arch(), 'lib', 'ex'+str(num)+'-obj', 'ex'+str(num)) 31 32 def source(self, library, num): 33 '''Return the path to the sources for a given example number''' 34 d = os.path.join(self.dir(), 'src', library.lower(), 'examples', 'tutorials') 35 name = 'ex'+str(num) 36 sources = [] 37 for f in os.listdir(d): 38 if f == name+'.c': 39 sources.insert(0, f) 40 elif f.startswith(name) and f.endswith('.cu'): 41 sources.append(f) 42 return map(lambda f: os.path.join(d, f), sources) 43 44class PETScExample(object): 45 def __init__(self, library, num, **defaultOptions): 46 self.petsc = PETSc() 47 self.library = library 48 self.num = num 49 self.opts = defaultOptions 50 return 51 52 @staticmethod 53 def runShellCommand(command, cwd = None, log = True): 54 import subprocess 55 56 Popen = subprocess.Popen 57 PIPE = subprocess.PIPE 58 if log: print 'Executing: %s\n' % (command,) 59 pipe = Popen(command, cwd=cwd, stdin=None, stdout=PIPE, stderr=PIPE, bufsize=-1, shell=True, universal_newlines=True) 60 (out, err) = pipe.communicate() 61 ret = pipe.returncode 62 return (out, err, ret) 63 64 def optionsToString(self, **opts): 65 '''Convert a dictionary of options to a command line argument string''' 66 a = [] 67 for key,value in opts.iteritems(): 68 if value is None: 69 a.append('-'+key) 70 else: 71 a.append('-'+key+' '+str(value)) 72 return ' '.join(a) 73 74 def run(self, numProcs = 1, log = True, **opts): 75 if self.petsc.mpiexec() is None: 76 cmd = self.petsc.example(self.num) 77 else: 78 cmd = ' '.join([self.petsc.mpiexec(), '-n', str(numProcs), self.petsc.example(self.num)]) 79 cmd += ' '+self.optionsToString(**self.opts)+' '+self.optionsToString(**opts) 80 if 'batch' in opts and opts['batch']: 81 del opts['batch'] 82 filename = generateBatchScript(self.num, numProcs, 120, ' '+self.optionsToString(**self.opts)+' '+self.optionsToString(**opts)) 83 # Submit job 84 out, err, ret = self.runShellCommand('qsub -q gpu '+filename, log = log) 85 if ret: 86 print err 87 print out 88 else: 89 out, err, ret = self.runShellCommand(cmd, log = log) 90 if ret: 91 print err 92 print out 93 return out 94 95def processSummary(moduleName, defaultStage, eventNames, times, events): 96 '''Process the Python log summary into plot data''' 97 m = __import__(moduleName) 98 reload(m) 99 # Total Time 100 times.append(m.Time[0]) 101 # Particular events 102 for name in eventNames: 103 if name.find(':') >= 0: 104 stageName, name = name.split(':', 1) 105 stage = getattr(m, stageName) 106 else: 107 stage = getattr(m, defaultStage) 108 if name in stage.event: 109 if not name in events: 110 events[name] = [] 111 try: 112 events[name].append((stage.event[name].Time[0], stage.event[name].Flops[0]/(stage.event[name].Time[0] * 1e6))) 113 except ZeroDivisionError: 114 events[name].append((stage.event[name].Time[0], 0)) 115 return 116 117def plotTime(library, num, eventNames, sizes, times, events): 118 from pylab import legend, plot, show, title, xlabel, ylabel 119 import numpy as np 120 121 arches = sizes.keys() 122 data = [] 123 for arch in arches: 124 data.append(sizes[arch]) 125 data.append(times[arch]) 126 plot(*data) 127 title('Performance on '+library+' Example '+str(num)) 128 xlabel('Number of Dof') 129 ylabel('Time (s)') 130 legend(arches, 'upper left', shadow = True) 131 show() 132 return 133 134def plotEventTime(library, num, eventNames, sizes, times, events, filename = None): 135 from pylab import close, legend, plot, savefig, show, title, xlabel, ylabel 136 import numpy as np 137 138 close() 139 arches = sizes.keys() 140 bs = events[arches[0]].keys()[0] 141 data = [] 142 names = [] 143 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 144 for arch, style in zip(arches, ['-', ':']): 145 if event in events[arch][bs]: 146 names.append(arch+'-'+str(bs)+' '+event) 147 data.append(sizes[arch][bs]) 148 data.append(np.array(events[arch][bs][event])[:,0]) 149 data.append(color+style) 150 else: 151 print 'Could not find %s in %s-%d events' % (event, arch, bs) 152 print data 153 plot(*data) 154 title('Performance on '+library+' Example '+str(num)) 155 xlabel('Number of Dof') 156 ylabel('Time (s)') 157 legend(names, 'upper left', shadow = True) 158 if filename is None: 159 show() 160 else: 161 savefig(filename) 162 return 163 164def plotEventFlop(library, num, eventNames, sizes, times, events, filename = None): 165 from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel 166 import numpy as np 167 168 arches = sizes.keys() 169 bs = events[arches[0]].keys()[0] 170 data = [] 171 names = [] 172 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 173 for arch, style in zip(arches, ['-', ':']): 174 if event in events[arch][bs]: 175 names.append(arch+'-'+str(bs)+' '+event) 176 data.append(sizes[arch][bs]) 177 data.append(1e-3*np.array(events[arch][bs][event])[:,1]) 178 data.append(color+style) 179 else: 180 print 'Could not find %s in %s-%d events' % (event, arch, bs) 181 semilogy(*data) 182 title('Performance on '+library+' Example '+str(num)) 183 xlabel('Number of Dof') 184 ylabel('Computation Rate (GF/s)') 185 legend(names, 'upper left', shadow = True) 186 if filename is None: 187 show() 188 else: 189 savefig(filename) 190 return 191 192def plotEventScaling(library, num, eventNames, procs, events, filename = None): 193 from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel 194 import numpy as np 195 196 arches = procs.keys() 197 bs = events[arches[0]].keys()[0] 198 data = [] 199 names = [] 200 for arch, style in zip(arches, ['-', ':']): 201 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 202 if event in events[arch][bs]: 203 names.append(arch+'-'+str(bs)+' '+event) 204 data.append(procs[arch][bs]) 205 data.append(1e-3*np.array(events[arch][bs][event])[:,1]) 206 data.append(color+style) 207 else: 208 print 'Could not find %s in %s-%d events' % (event, arch, bs) 209 plot(*data) 210 title('Performance on '+library+' Example '+str(num)) 211 xlabel('Number of Processors') 212 ylabel('Computation Rate (GF/s)') 213 legend(names, 'upper left', shadow = True) 214 if filename is None: 215 show() 216 else: 217 savefig(filename) 218 return 219 220def plotSummaryLine(library, num, eventNames, sizes, times, events): 221 from pylab import legend, plot, show, title, xlabel, ylabel 222 import numpy as np 223 showTime = False 224 showEventTime = True 225 showEventFlops = True 226 arches = sizes.keys() 227 # Time 228 if showTime: 229 data = [] 230 for arch in arches: 231 data.append(sizes[arch]) 232 data.append(times[arch]) 233 plot(*data) 234 title('Performance on '+library+' Example '+str(num)) 235 xlabel('Number of Dof') 236 ylabel('Time (s)') 237 legend(arches, 'upper left', shadow = True) 238 show() 239 # Common event time 240 # We could make a stacked plot like Rio uses here 241 if showEventTime: 242 bs = events[arches[0]].keys()[0] 243 data = [] 244 names = [] 245 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 246 for arch, style in zip(arches, ['-', ':']): 247 if event in events[arch][bs]: 248 names.append(arch+'-'+str(bs)+' '+event) 249 data.append(sizes[arch][bs]) 250 data.append(np.array(events[arch][bs][event])[:,0]) 251 data.append(color+style) 252 else: 253 print 'Could not find %s in %s-%d events' % (event, arch, bs) 254 print data 255 plot(*data) 256 title('Performance on '+library+' Example '+str(num)) 257 xlabel('Number of Dof') 258 ylabel('Time (s)') 259 legend(names, 'upper left', shadow = True) 260 show() 261 # Common event flops 262 # We could make a stacked plot like Rio uses here 263 if showEventFlops: 264 bs = events[arches[0]].keys()[0] 265 data = [] 266 names = [] 267 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 268 for arch, style in zip(arches, ['-', ':']): 269 if event in events[arch][bs]: 270 names.append(arch+'-'+str(bs)+' '+event) 271 data.append(sizes[arch][bs]) 272 data.append(np.array(events[arch][bs][event])[:,1]) 273 data.append(color+style) 274 else: 275 print 'Could not find %s in %s-%d events' % (event, arch, bs) 276 plot(*data) 277 title('Performance on '+library+' Example '+str(num)) 278 xlabel('Number of Dof') 279 ylabel('Computation Rate (MF/s)') 280 legend(names, 'upper left', shadow = True) 281 show() 282 return 283 284def plotSummaryBar(library, num, eventNames, sizes, times, events): 285 import numpy as np 286 import matplotlib.pyplot as plt 287 288 eventColors = ['b', 'g', 'r', 'y'] 289 arches = sizes.keys() 290 names = [] 291 N = len(sizes[arches[0]]) 292 width = 0.2 293 ind = np.arange(N) - 0.25 294 bars = {} 295 for arch in arches: 296 bars[arch] = [] 297 bottom = np.zeros(N) 298 for event, color in zip(eventNames, eventColors): 299 names.append(arch+' '+event) 300 times = np.array(events[arch][event])[:,0] 301 bars[arch].append(plt.bar(ind, times, width, color=color, bottom=bottom)) 302 bottom += times 303 ind += 0.3 304 305 plt.xlabel('Number of Dof') 306 plt.ylabel('Time (s)') 307 plt.title('GPU vs. CPU Performance on '+library+' Example '+str(num)) 308 plt.xticks(np.arange(N), map(str, sizes[arches[0]])) 309 #plt.yticks(np.arange(0,81,10)) 310 #plt.legend( (p1[0], p2[0]), ('Men', 'Women') ) 311 plt.legend([bar[0] for bar in bars[arches[0]]], eventNames, 'upper right', shadow = True) 312 313 plt.show() 314 return 315 316def getDMComplexSize(dim, out): 317 '''Retrieves the number of cells from ''' 318 size = 0 319 for line in out.split('\n'): 320 if line.strip().startswith(str(dim)+'-cells: '): 321 size = int(line.strip()[9:]) 322 break 323 return size 324 325def run_DMDA(ex, name, opts, args, sizes, times, events, log=True): 326 for n in map(int, args.size): 327 ex.run(log=log, da_grid_x=n, da_grid_y=n, **opts) 328 sizes[name].append(n*n * args.comp) 329 processSummary('summary', args.stage, args.events, times[name], events[name]) 330 return 331 332def run_DMComplex(ex, name, opts, args, sizes, times, events, log=True): 333 # This should eventually be replaced by a direct FFC/Ignition interface 334 if args.operator == 'laplacian': 335 numComp = 1 336 elif args.operator == 'elasticity': 337 numComp = args.dim 338 else: 339 raise RuntimeError('Unknown operator: %s' % args.operator) 340 341 for numBlock in [2**i for i in map(int, args.blockExp)]: 342 opts['gpu_blocks'] = numBlock 343 # Generate new block size 344 cmd = './bin/pythonscripts/PetscGenerateFEMQuadrature.py %d %d %d %d %s %s.h' % (args.dim, args.order, numComp, numBlock, args.operator, os.path.splitext(source[0])[0]) 345 print(cmd) 346 ret = os.system('python '+cmd) 347 args.files = ['['+','.join(source)+']'] 348 buildExample(args) 349 sizes[name][numBlock] = [] 350 times[name][numBlock] = [] 351 events[name][numBlock] = {} 352 for r in map(float, args.refine): 353 out = ex.run(log=log, refinement_limit=r, **opts) 354 sizes[name][numBlock].append(getDMComplexSize(args.dim, out)) 355 processSummary('summary', args.stage, args.events, times[name][numBlock], events[name][numBlock]) 356 return 357 358def outputData(sizes, times, events, name = 'output.py'): 359 if os.path.exists(name): 360 base, ext = os.path.splitext(name) 361 num = 1 362 while os.path.exists(base+str(num)+ext): 363 num += 1 364 name = base+str(num)+ext 365 with file(name, 'w') as f: 366 f.write('#PETSC_ARCH='+os.environ['PETSC_ARCH']+' '+' '.join(sys.argv)+'\n') 367 f.write('sizes = '+repr(sizes)+'\n') 368 f.write('times = '+repr(times)+'\n') 369 f.write('events = '+repr(events)+'\n') 370 return 371 372if __name__ == '__main__': 373 import argparse 374 375 parser = argparse.ArgumentParser(description = 'PETSc Benchmarking', 376 epilog = 'This script runs src/<library>/examples/tutorials/ex<num>, For more information, visit http://www.mcs.anl.gov/petsc', 377 formatter_class = argparse.ArgumentDefaultsHelpFormatter) 378 parser.add_argument('--library', default='SNES', help='The PETSc library used in this example') 379 parser.add_argument('--num', type = int, default='5', help='The example number') 380 parser.add_argument('--module', default='summary', help='The module for timing output') 381 parser.add_argument('--stage', default='Main_Stage', help='The default logging stage') 382 parser.add_argument('--events', nargs='+', help='Events to process') 383 parser.add_argument('--batch', action='store_true', default=False, help='Generate batch files for the runs instead') 384 parser.add_argument('--daemon', action='store_true', default=False, help='Run as a daemon') 385 subparsers = parser.add_subparsers(help='DM types') 386 387 parser_dmda = subparsers.add_parser('DMDA', help='Use a DMDA for the problem geometry') 388 parser_dmda.add_argument('--size', nargs='+', default=['10'], help='Grid size (implementation dependent)') 389 parser_dmda.add_argument('--comp', type = int, default='1', help='Number of field components') 390 parser_dmda.add_argument('runs', nargs='*', help='Run descriptions: <name>=<args>') 391 392 parser_dmmesh = subparsers.add_parser('DMComplex', help='Use a DMComplex for the problem geometry') 393 parser_dmmesh.add_argument('--dim', type = int, default='2', help='Spatial dimension') 394 parser_dmmesh.add_argument('--refine', nargs='+', default=['0.0'], help='List of refinement limits') 395 parser_dmmesh.add_argument('--order', type = int, default='1', help='Order of the finite element') 396 parser_dmmesh.add_argument('--operator', default='laplacian', help='The operator name') 397 parser_dmmesh.add_argument('--blockExp', nargs='+', default=range(0, 5), help='List of block exponents j, block size is 2^j') 398 parser_dmmesh.add_argument('runs', nargs='*', help='Run descriptions: <name>=<args>') 399 400 args = parser.parse_args() 401 print(args) 402 if hasattr(args, 'comp'): 403 args.dmType = 'DMDA' 404 else: 405 args.dmType = 'DMComplex' 406 407 ex = PETScExample(args.library, args.num, log_summary='summary.dat', log_summary_python = None if args.batch else args.module+'.py', preload='off') 408 source = ex.petsc.source(args.library, args.num) 409 sizes = {} 410 times = {} 411 events = {} 412 log = not args.daemon 413 414 if args.daemon: 415 import daemon 416 print 'Starting daemon' 417 daemon.createDaemon('.') 418 419 for run in args.runs: 420 name, stropts = run.split('=', 1) 421 opts = dict([t if len(t) == 2 else (t[0], None) for t in [arg.split('=', 1) for arg in stropts.split(' ')]]) 422 if args.dmType == 'DMDA': 423 sizes[name] = [] 424 times[name] = [] 425 events[name] = {} 426 run_DMDA(ex, name, opts, args, sizes, times, events, log=log) 427 elif args.dmType == 'DMComplex': 428 sizes[name] = {} 429 times[name] = {} 430 events[name] = {} 431 run_DMComplex(ex, name, opts, args, sizes, times, events, log=log) 432 outputData(sizes, times, events) 433 if not args.batch and log: plotSummaryLine(args.library, args.num, args.events, sizes, times, events) 434# Benchmark for ex50 435# ./src/benchmarks/benchmarkExample.py --events VecMDot VecMAXPY KSPGMRESOrthog MatMult VecCUSPCopyTo VecCUSPCopyFrom MatCUSPCopyTo --num 50 DMDA --size 10 20 50 100 --comp 4 CPU='pc_type=none mat_no_inode dm_vec_type=seq dm_mat_type=seqaij' GPU='pc_type=none mat_no_inode dm_vec_type=seqcusp dm_mat_type=seqaijcusp cusp_synchronize' 436# Benchmark for ex52 437# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dm_view show_residual=0 compute_function batch' GPU='dm_view show_residual=0 compute_function batch gpu gpu_batches=8' 438# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 --operator=elasticity CPU='dm_view op_type=elasticity show_residual=0 compute_function batch' GPU='dm_view op_type=elasticity show_residual=0 compute_function batch gpu gpu_batches=8' 439# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --dim=3 --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dim=3 dm_view show_residual=0 compute_function batch' GPU='dim=3 dm_view show_residual=0 compute_function batch gpu gpu_batches=8' 440