1#!/usr/bin/env python 2from __future__ import print_function 3import os,sys 4sys.path.append(os.path.join(os.environ['PETSC_DIR'], 'config')) 5sys.path.append(os.getcwd()) 6from builder2 import buildExample 7from benchmarkBatch import generateBatchScript 8 9class PETSc(object): 10 def __init__(self): 11 return 12 13 def dir(self): 14 '''Return the root directory for the PETSc tree (usually $PETSC_DIR)''' 15 # This should search for a valid PETSc 16 return os.environ['PETSC_DIR'] 17 18 def arch(self): 19 '''Return the PETSc build label (usually $PETSC_ARCH)''' 20 # This should be configurable 21 return os.environ['PETSC_ARCH'] 22 23 def mpiexec(self): 24 '''Return the path for the mpi launch executable''' 25 mpiexec = os.path.join(self.dir(), self.arch(), 'bin', 'mpiexec') 26 if not os.path.isfile(mpiexec): 27 return None 28 return mpiexec 29 30 def example(self, library, num): 31 '''Return the path to the executable for a given example number''' 32 return os.path.join(self.dir(), self.arch(), 'lib', library.lower(), 'ex'+str(num)) 33 34 def source(self, library, num, filenametail): 35 '''Return the path to the sources for a given example number''' 36 d = os.path.join(self.dir(), 'src', library.lower(), 'examples', 'tutorials') 37 name = 'ex'+str(num) 38 sources = [] 39 for f in os.listdir(d): 40 if f == name+'.c': 41 sources.insert(0, f) 42 elif f.startswith(name) and f.endswith(filenametail): 43 sources.append(f) 44 return map(lambda f: os.path.join(d, f), sources) 45 46class PETScExample(object): 47 def __init__(self, library, num, **defaultOptions): 48 self.petsc = PETSc() 49 self.library = library 50 self.num = num 51 self.opts = defaultOptions 52 return 53 54 @staticmethod 55 def runShellCommand(command, cwd = None, log = True): 56 import subprocess 57 58 Popen = subprocess.Popen 59 PIPE = subprocess.PIPE 60 if log: print('Executing: %s\n' % (command,)) 61 pipe = Popen(command, cwd=cwd, stdin=None, stdout=PIPE, stderr=PIPE, bufsize=-1, shell=True, universal_newlines=True) 62 (out, err) = pipe.communicate() 63 ret = pipe.returncode 64 return (out, err, ret) 65 66 def optionsToString(self, **opts): 67 '''Convert a dictionary of options to a command line argument string''' 68 a = [] 69 for key,value in opts.iteritems(): 70 if value is None: 71 a.append('-'+key) 72 else: 73 a.append('-'+key+' '+str(value)) 74 return ' '.join(a) 75 76 def build(self, log = True): 77 sdir = os.path.join(self.petsc.dir(), 'src', self.library.lower(), 'examples', 'tutorials') 78 odir = os.getcwd() 79 os.chdir(sdir) 80 cmd = 'make ex'+str(self.num) 81 out, err, ret = self.runShellCommand(cmd, cwd = sdir, log = log) 82 if err: 83 raise RuntimeError('Unable to build example:\n'+err+out) 84 os.chdir(odir) 85 bdir = os.path.dirname(self.petsc.example(self.library, self.num)) 86 try: 87 os.makedirs(bdir) 88 except OSError: 89 if not os.path.isdir(bdir): 90 raise 91 cmd = 'mv '+os.path.join(sdir, 'ex'+str(self.num))+' '+self.petsc.example(self.library, self.num) 92 out, err, ret = self.runShellCommand(cmd, log = log) 93 if ret: 94 print(err) 95 print(out) 96 return 97 98 def run(self, numProcs = 1, log = True, **opts): 99 cmd = '' 100 if self.petsc.mpiexec() is not None: 101 cmd += self.petsc.mpiexec() + ' ' 102 numProcs = os.environ.get('NUM_RANKS', numProcs) 103 cmd += ' -n ' + str(numProcs) + ' ' 104 if 'PE_HOSTFILE' in os.environ: 105 cmd += ' -hostfile hostfile ' 106 cmd += ' '.join([self.petsc.example(self.library, self.num), self.optionsToString(**self.opts), self.optionsToString(**opts)]) 107 if 'batch' in opts and opts['batch']: 108 del opts['batch'] 109 filename = generateBatchScript(self.num, numProcs, 120, ' '+self.optionsToString(**self.opts)+' '+self.optionsToString(**opts)) 110 # Submit job 111 out, err, ret = self.runShellCommand('qsub -q gpu '+filename, log = log) 112 if ret: 113 print(err) 114 print(out) 115 else: 116 out, err, ret = self.runShellCommand(cmd, log = log) 117 if ret: 118 print(err) 119 print(out) 120 return out 121 122def processSummary(moduleName, defaultStage, eventNames, times, events): 123 '''Process the Python log summary into plot data''' 124 m = __import__(moduleName) 125 reload(m) 126 # Total Time 127 times.append(m.Time[0]) 128 # Particular events 129 for name in eventNames: 130 if name.find(':') >= 0: 131 stageName, name = name.split(':', 1) 132 stage = getattr(m, stageName) 133 else: 134 stage = getattr(m, defaultStage) 135 if name in stage.event: 136 if not name in events: 137 events[name] = [] 138 try: 139 events[name].append((max(stage.event[name].Time), sum(stage.event[name].Flops)/(max(stage.event[name].Time) * 1e6))) 140 except ZeroDivisionError: 141 events[name].append((max(stage.event[name].Time), 0)) 142 return 143 144def plotTime(library, num, eventNames, sizes, times, events): 145 from pylab import legend, plot, show, title, xlabel, ylabel 146 import numpy as np 147 148 arches = sizes.keys() 149 data = [] 150 for arch in arches: 151 data.append(sizes[arch]) 152 data.append(times[arch]) 153 plot(*data) 154 title('Performance on '+library+' Example '+str(num)) 155 xlabel('Number of Dof') 156 ylabel('Time (s)') 157 legend(arches, 'upper left', shadow = True) 158 show() 159 return 160 161def plotEventTime(library, num, eventNames, sizes, times, events, filename = None): 162 from pylab import close, legend, plot, savefig, show, title, xlabel, ylabel 163 import numpy as np 164 165 close() 166 arches = sizes.keys() 167 bs = events[arches[0]].keys()[0] 168 data = [] 169 names = [] 170 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 171 for arch, style in zip(arches, ['-', ':']): 172 if event in events[arch][bs]: 173 names.append(arch+'-'+str(bs)+' '+event) 174 data.append(sizes[arch][bs]) 175 data.append(np.array(events[arch][bs][event])[:,0]) 176 data.append(color+style) 177 else: 178 print('Could not find %s in %s-%d events' % (event, arch, bs)) 179 print(data) 180 plot(*data) 181 title('Performance on '+library+' Example '+str(num)) 182 xlabel('Number of Dof') 183 ylabel('Time (s)') 184 legend(names, 'upper left', shadow = True) 185 if filename is None: 186 show() 187 else: 188 savefig(filename) 189 return 190 191def plotEventFlop(library, num, eventNames, sizes, times, events, filename = None): 192 from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel 193 import numpy as np 194 195 arches = sizes.keys() 196 bs = events[arches[0]].keys()[0] 197 data = [] 198 names = [] 199 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 200 for arch, style in zip(arches, ['-', ':']): 201 if event in events[arch][bs]: 202 names.append(arch+'-'+str(bs)+' '+event) 203 data.append(sizes[arch][bs]) 204 data.append(1e-3*np.array(events[arch][bs][event])[:,1]) 205 data.append(color+style) 206 else: 207 print('Could not find %s in %s-%d events' % (event, arch, bs)) 208 semilogy(*data) 209 title('Performance on '+library+' Example '+str(num)) 210 xlabel('Number of Dof') 211 ylabel('Computation Rate (GF/s)') 212 legend(names, 'upper left', shadow = True) 213 if filename is None: 214 show() 215 else: 216 savefig(filename) 217 return 218 219def plotEventScaling(library, num, eventNames, procs, events, filename = None): 220 from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel 221 import numpy as np 222 223 arches = procs.keys() 224 bs = events[arches[0]].keys()[0] 225 data = [] 226 names = [] 227 for arch, style in zip(arches, ['-', ':']): 228 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 229 if event in events[arch][bs]: 230 names.append(arch+'-'+str(bs)+' '+event) 231 data.append(procs[arch][bs]) 232 data.append(1e-3*np.array(events[arch][bs][event])[:,1]) 233 data.append(color+style) 234 else: 235 print('Could not find %s in %s-%d events' % (event, arch, bs)) 236 plot(*data) 237 title('Performance on '+library+' Example '+str(num)) 238 xlabel('Number of Processors') 239 ylabel('Computation Rate (GF/s)') 240 legend(names, 'upper left', shadow = True) 241 if filename is None: 242 show() 243 else: 244 savefig(filename) 245 return 246 247def plotSummaryLine(library, num, eventNames, sizes, times, events): 248 from pylab import legend, plot, show, title, xlabel, ylabel 249 import numpy as np 250 showTime = False 251 showEventTime = True 252 showEventFlops = True 253 arches = sizes.keys() 254 # Time 255 if showTime: 256 data = [] 257 for arch in arches: 258 data.append(sizes[arch]) 259 data.append(times[arch]) 260 plot(*data) 261 title('Performance on '+library+' Example '+str(num)) 262 xlabel('Number of Dof') 263 ylabel('Time (s)') 264 legend(arches, 'upper left', shadow = True) 265 show() 266 # Common event time 267 # We could make a stacked plot like Rio uses here 268 if showEventTime: 269 bs = events[arches[0]].keys()[0] 270 data = [] 271 names = [] 272 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 273 for arch, style in zip(arches, ['-', ':']): 274 if event in events[arch][bs]: 275 names.append(arch+'-'+str(bs)+' '+event) 276 data.append(sizes[arch][bs]) 277 data.append(np.array(events[arch][bs][event])[:,0]) 278 data.append(color+style) 279 else: 280 print('Could not find %s in %s-%d events' % (event, arch, bs)) 281 print(data) 282 plot(*data) 283 title('Performance on '+library+' Example '+str(num)) 284 xlabel('Number of Dof') 285 ylabel('Time (s)') 286 legend(names, 'upper left', shadow = True) 287 show() 288 # Common event flops 289 # We could make a stacked plot like Rio uses here 290 if showEventFlops: 291 bs = events[arches[0]].keys()[0] 292 data = [] 293 names = [] 294 for event, color in zip(eventNames, ['b', 'g', 'r', 'y']): 295 for arch, style in zip(arches, ['-', ':']): 296 if event in events[arch][bs]: 297 names.append(arch+'-'+str(bs)+' '+event) 298 data.append(sizes[arch][bs]) 299 data.append(np.array(events[arch][bs][event])[:,1]) 300 data.append(color+style) 301 else: 302 print('Could not find %s in %s-%d events' % (event, arch, bs)) 303 plot(*data) 304 title('Performance on '+library+' Example '+str(num)) 305 xlabel('Number of Dof') 306 ylabel('Computation Rate (MF/s)') 307 legend(names, 'upper left', shadow = True) 308 show() 309 return 310 311def plotSummaryBar(library, num, eventNames, sizes, times, events): 312 import numpy as np 313 import matplotlib.pyplot as plt 314 315 eventColors = ['b', 'g', 'r', 'y'] 316 arches = sizes.keys() 317 names = [] 318 N = len(sizes[arches[0]]) 319 width = 0.2 320 ind = np.arange(N) - 0.25 321 bars = {} 322 for arch in arches: 323 bars[arch] = [] 324 bottom = np.zeros(N) 325 for event, color in zip(eventNames, eventColors): 326 names.append(arch+' '+event) 327 times = np.array(events[arch][event])[:,0] 328 bars[arch].append(plt.bar(ind, times, width, color=color, bottom=bottom)) 329 bottom += times 330 ind += 0.3 331 332 plt.xlabel('Number of Dof') 333 plt.ylabel('Time (s)') 334 plt.title('GPU vs. CPU Performance on '+library+' Example '+str(num)) 335 plt.xticks(np.arange(N), map(str, sizes[arches[0]])) 336 #plt.yticks(np.arange(0,81,10)) 337 #plt.legend( (p1[0], p2[0]), ('Men', 'Women') ) 338 plt.legend([bar[0] for bar in bars[arches[0]]], eventNames, 'upper right', shadow = True) 339 340 plt.show() 341 return 342 343def getDMComplexSize(dim, out): 344 '''Retrieves the number of cells from -dm_view output''' 345 size = 0 346 for line in out.split('\n'): 347 if line.strip().startswith(str(dim)+'-cells: '): 348 sizes = line.strip()[9:].split() 349 size = sum(map(int, sizes)) 350 break 351 return size 352 353def run_DMDA(ex, name, opts, args, sizes, times, events, log=True): 354 for n in map(int, args.size): 355 ex.run(log=log, da_grid_x=n, da_grid_y=n, **opts) 356 sizes[name].append(n*n * args.comp) 357 processSummary('summary', args.stage, args.events, times[name], events[name]) 358 return 359 360def run_DMComplex(ex, name, opts, args, sizes, times, events, log=True): 361 # This should eventually be replaced by a direct FFC/Ignition interface 362 if args.operator == 'laplacian': 363 numComp = 1 364 elif args.operator == 'elasticity': 365 numComp = args.dim 366 else: 367 raise RuntimeError('Unknown operator: %s' % args.operator) 368 369 for numBlock in [2**i for i in map(int, args.blockExp)]: 370 opts['gpu_blocks'] = numBlock 371 args.files = ['['+','.join(source)+']'] 372 buildExample(args) 373 sizes[name][numBlock] = [] 374 times[name][numBlock] = [] 375 events[name][numBlock] = {} 376 for r in map(float, args.refine): 377 out = ex.run(log=log, refinement_limit=r, **opts) 378 sizes[name][numBlock].append(getDMComplexSize(args.dim, out)) 379 processSummary('summary', args.stage, args.events, times[name][numBlock], events[name][numBlock]) 380 return 381 382def outputData(sizes, times, events, name = 'output.py'): 383 if os.path.exists(name): 384 base, ext = os.path.splitext(name) 385 num = 1 386 while os.path.exists(base+str(num)+ext): 387 num += 1 388 name = base+str(num)+ext 389 with file(name, 'w') as f: 390 f.write('#PETSC_ARCH='+os.environ['PETSC_ARCH']+' '+' '.join(sys.argv)+'\n') 391 f.write('sizes = '+repr(sizes)+'\n') 392 f.write('times = '+repr(times)+'\n') 393 f.write('events = '+repr(events)+'\n') 394 return 395 396if __name__ == '__main__': 397 import argparse 398 399 parser = argparse.ArgumentParser(description = 'PETSc Benchmarking', 400 epilog = 'This script runs src/<library>/examples/tutorials/ex<num>, For more information, visit http://www.mcs.anl.gov/petsc', 401 formatter_class = argparse.ArgumentDefaultsHelpFormatter) 402 parser.add_argument('--library', default='SNES', help='The PETSc library used in this example') 403 parser.add_argument('--num', type = int, default='5', help='The example number') 404 parser.add_argument('--module', default='summary', help='The module for timing output') 405 parser.add_argument('--stage', default='Main_Stage', help='The default logging stage') 406 parser.add_argument('--events', nargs='+', help='Events to process') 407 parser.add_argument('--batch', action='store_true', default=False, help='Generate batch files for the runs instead') 408 parser.add_argument('--daemon', action='store_true', default=False, help='Run as a daemon') 409 parser.add_argument('--gpulang', default='OpenCL', help='GPU Language to use: Either CUDA or OpenCL (default)') 410 subparsers = parser.add_subparsers(help='DM types') 411 412 parser_dmda = subparsers.add_parser('DMDA', help='Use a DMDA for the problem geometry') 413 parser_dmda.add_argument('--size', nargs='+', default=['10'], help='Grid size (implementation dependent)') 414 parser_dmda.add_argument('--comp', type = int, default='1', help='Number of field components') 415 parser_dmda.add_argument('runs', nargs='*', help='Run descriptions: <name>=<args>') 416 417 parser_dmmesh = subparsers.add_parser('DMComplex', help='Use a DMComplex for the problem geometry') 418 parser_dmmesh.add_argument('--dim', type = int, default='2', help='Spatial dimension') 419 parser_dmmesh.add_argument('--refine', nargs='+', default=['0.0'], help='List of refinement limits') 420 parser_dmmesh.add_argument('--order', type = int, default='1', help='Order of the finite element') 421 parser_dmmesh.add_argument('--operator', default='laplacian', help='The operator name') 422 parser_dmmesh.add_argument('--blockExp', nargs='+', default=range(0, 5), help='List of block exponents j, block size is 2^j') 423 parser_dmmesh.add_argument('runs', nargs='*', help='Run descriptions: <name>=<args>') 424 425 args = parser.parse_args() 426 print(args) 427 if hasattr(args, 'comp'): 428 args.dmType = 'DMDA' 429 else: 430 args.dmType = 'DMComplex' 431 432 ex = PETScExample(args.library, args.num, log_summary='summary.dat', log_summary_python = None if args.batch else args.module+'.py', preload='off') 433 if args.gpulang == 'CUDA': 434 source = ex.petsc.source(args.library, args.num, '.cu') 435 else: 436 source = ex.petsc.source(args.library, args.num, 'OpenCL.c') # Using the convention of OpenCL code residing in source files ending in 'OpenCL.c' (at least for snes/ex52) 437 sizes = {} 438 times = {} 439 events = {} 440 log = not args.daemon 441 442 if args.daemon: 443 import daemon 444 print('Starting daemon') 445 daemon.createDaemon('.') 446 447 for run in args.runs: 448 name, stropts = run.split('=', 1) 449 opts = dict([t if len(t) == 2 else (t[0], None) for t in [arg.split('=', 1) for arg in stropts.split(' ')]]) 450 if args.dmType == 'DMDA': 451 sizes[name] = [] 452 times[name] = [] 453 events[name] = {} 454 run_DMDA(ex, name, opts, args, sizes, times, events, log=log) 455 elif args.dmType == 'DMComplex': 456 sizes[name] = {} 457 times[name] = {} 458 events[name] = {} 459 run_DMComplex(ex, name, opts, args, sizes, times, events, log=log) 460 outputData(sizes, times, events) 461 if not args.batch and log: plotSummaryLine(args.library, args.num, args.events, sizes, times, events) 462# Benchmark for ex50 463# ./src/benchmarks/benchmarkExample.py --events VecMDot VecMAXPY KSPGMRESOrthog MatMult VecCUSPCopyTo VecCUSPCopyFrom MatCUSPCopyTo --num 50 DMDA --size 10 20 50 100 --comp 4 CPU='pc_type=none mat_no_inode dm_vec_type=seq dm_mat_type=seqaij' GPU='pc_type=none mat_no_inode dm_vec_type=seqcusp dm_mat_type=seqaijcusp cusp_synchronize' 464# Benchmark for ex52 465# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dm_view show_residual=0 compute_function batch' GPU='dm_view show_residual=0 compute_function batch gpu gpu_batches=8' 466# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 --operator=elasticity CPU='dm_view op_type=elasticity show_residual=0 compute_function batch' GPU='dm_view op_type=elasticity show_residual=0 compute_function batch gpu gpu_batches=8' 467# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --dim=3 --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dim=3 dm_view show_residual=0 compute_function batch' GPU='dim=3 dm_view show_residual=0 compute_function batch gpu gpu_batches=8' 468