xref: /petsc/src/benchmarks/benchmarkExample.py (revision 34e9cff27d07519c2d99d1c0a9fe31a7bc1659d3)
1#!/usr/bin/env python
2import os,sys
3sys.path.append(os.path.join(os.environ['PETSC_DIR'], 'config'))
4sys.path.append(os.getcwd())
5from builder2 import buildExample
6from benchmarkBatch import generateBatchScript
7
8class PETSc(object):
9  def __init__(self):
10    return
11
12  def dir(self):
13    '''Return the root directory for the PETSc tree (usually $PETSC_DIR)'''
14    # This should search for a valid PETSc
15    return os.environ['PETSC_DIR']
16
17  def arch(self):
18    '''Return the PETSc build label (usually $PETSC_ARCH)'''
19    # This should be configurable
20    return os.environ['PETSC_ARCH']
21
22  def mpiexec(self):
23    '''Return the path for the mpi launch executable'''
24    mpiexec = os.path.join(self.dir(), self.arch(), 'bin', 'mpiexec')
25    if not os.path.isfile(mpiexec):
26      return None
27    return mpiexec
28
29  def example(self, library, num):
30    '''Return the path to the executable for a given example number'''
31    return os.path.join(self.dir(), self.arch(), 'lib', library.lower(), 'ex'+str(num))
32
33  def source(self, library, num, filenametail):
34    '''Return the path to the sources for a given example number'''
35    d = os.path.join(self.dir(), 'src', library.lower(), 'examples', 'tutorials')
36    name = 'ex'+str(num)
37    sources = []
38    for f in os.listdir(d):
39      if f == name+'.c':
40        sources.insert(0, f)
41      elif f.startswith(name) and f.endswith(filenametail):
42        sources.append(f)
43    return map(lambda f: os.path.join(d, f), sources)
44
45class PETScExample(object):
46  def __init__(self, library, num, **defaultOptions):
47    self.petsc   = PETSc()
48    self.library = library
49    self.num     = num
50    self.opts    = defaultOptions
51    return
52
53  @staticmethod
54  def runShellCommand(command, cwd = None, log = True):
55    import subprocess
56
57    Popen = subprocess.Popen
58    PIPE  = subprocess.PIPE
59    if log: print 'Executing: %s\n' % (command,)
60    pipe = Popen(command, cwd=cwd, stdin=None, stdout=PIPE, stderr=PIPE, bufsize=-1, shell=True, universal_newlines=True)
61    (out, err) = pipe.communicate()
62    ret = pipe.returncode
63    return (out, err, ret)
64
65  def optionsToString(self, **opts):
66    '''Convert a dictionary of options to a command line argument string'''
67    a = []
68    for key,value in opts.iteritems():
69      if value is None:
70        a.append('-'+key)
71      else:
72        a.append('-'+key+' '+str(value))
73    return ' '.join(a)
74
75  def build(self, log = True):
76    sdir = os.path.join(self.petsc.dir(), 'src', self.library.lower(), 'examples', 'tutorials')
77    odir = os.getcwd()
78    os.chdir(sdir)
79    cmd = 'make ex'+str(self.num)
80    out, err, ret = self.runShellCommand(cmd, cwd = sdir, log = log)
81    if err:
82      raise RuntimeError('Unable to build example:\n'+err+out)
83    os.chdir(odir)
84    bdir = os.path.dirname(self.petsc.example(self.library, self.num))
85    try:
86      os.makedirs(bdir)
87    except OSError:
88      if not os.path.isdir(bdir):
89        raise
90    cmd = 'mv '+os.path.join(sdir, 'ex'+str(self.num))+' '+self.petsc.example(self.library, self.num)
91    out, err, ret = self.runShellCommand(cmd, log = log)
92    if ret:
93      print err
94      print out
95    return
96
97  def run(self, numProcs = 1, log = True, **opts):
98    cmd = ''
99    if self.petsc.mpiexec() is not None:
100      cmd += self.petsc.mpiexec() + ' '
101      numProcs = os.environ.get('NUM_RANKS', numProcs)
102      cmd += ' -n ' + str(numProcs) + ' '
103      if os.environ.has_key('PE_HOSTFILE'):
104        cmd += ' -hostfile hostfile '
105    cmd += ' '.join([self.petsc.example(self.library, self.num), self.optionsToString(**self.opts), self.optionsToString(**opts)])
106    if 'batch' in opts and opts['batch']:
107      del opts['batch']
108      filename = generateBatchScript(self.num, numProcs, 120, ' '+self.optionsToString(**self.opts)+' '+self.optionsToString(**opts))
109      # Submit job
110      out, err, ret = self.runShellCommand('qsub -q gpu '+filename, log = log)
111      if ret:
112        print err
113        print out
114    else:
115      out, err, ret = self.runShellCommand(cmd, log = log)
116      if ret:
117        print err
118        print out
119    return out
120
121def processSummary(moduleName, defaultStage, eventNames, times, events):
122  '''Process the Python log summary into plot data'''
123  m = __import__(moduleName)
124  reload(m)
125  # Total Time
126  times.append(m.Time[0])
127  # Particular events
128  for name in eventNames:
129    if name.find(':') >= 0:
130      stageName, name = name.split(':', 1)
131      stage = getattr(m, stageName)
132    else:
133      stage = getattr(m, defaultStage)
134    if name in stage.event:
135      if not name in events:
136        events[name] = []
137      try:
138        events[name].append((max(stage.event[name].Time), sum(stage.event[name].Flops)/(max(stage.event[name].Time) * 1e6)))
139      except ZeroDivisionError:
140        events[name].append((max(stage.event[name].Time), 0))
141  return
142
143def plotTime(library, num, eventNames, sizes, times, events):
144  from pylab import legend, plot, show, title, xlabel, ylabel
145  import numpy as np
146
147  arches = sizes.keys()
148  data   = []
149  for arch in arches:
150    data.append(sizes[arch])
151    data.append(times[arch])
152  plot(*data)
153  title('Performance on '+library+' Example '+str(num))
154  xlabel('Number of Dof')
155  ylabel('Time (s)')
156  legend(arches, 'upper left', shadow = True)
157  show()
158  return
159
160def plotEventTime(library, num, eventNames, sizes, times, events, filename = None):
161  from pylab import close, legend, plot, savefig, show, title, xlabel, ylabel
162  import numpy as np
163
164  close()
165  arches = sizes.keys()
166  bs     = events[arches[0]].keys()[0]
167  data   = []
168  names  = []
169  for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
170    for arch, style in zip(arches, ['-', ':']):
171      if event in events[arch][bs]:
172        names.append(arch+'-'+str(bs)+' '+event)
173        data.append(sizes[arch][bs])
174        data.append(np.array(events[arch][bs][event])[:,0])
175        data.append(color+style)
176      else:
177        print 'Could not find %s in %s-%d events' % (event, arch, bs)
178  print data
179  plot(*data)
180  title('Performance on '+library+' Example '+str(num))
181  xlabel('Number of Dof')
182  ylabel('Time (s)')
183  legend(names, 'upper left', shadow = True)
184  if filename is None:
185    show()
186  else:
187    savefig(filename)
188  return
189
190def plotEventFlop(library, num, eventNames, sizes, times, events, filename = None):
191  from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel
192  import numpy as np
193
194  arches = sizes.keys()
195  bs     = events[arches[0]].keys()[0]
196  data   = []
197  names  = []
198  for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
199    for arch, style in zip(arches, ['-', ':']):
200      if event in events[arch][bs]:
201        names.append(arch+'-'+str(bs)+' '+event)
202        data.append(sizes[arch][bs])
203        data.append(1e-3*np.array(events[arch][bs][event])[:,1])
204        data.append(color+style)
205      else:
206        print 'Could not find %s in %s-%d events' % (event, arch, bs)
207  semilogy(*data)
208  title('Performance on '+library+' Example '+str(num))
209  xlabel('Number of Dof')
210  ylabel('Computation Rate (GF/s)')
211  legend(names, 'upper left', shadow = True)
212  if filename is None:
213    show()
214  else:
215    savefig(filename)
216  return
217
218def plotEventScaling(library, num, eventNames, procs, events, filename = None):
219  from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel
220  import numpy as np
221
222  arches = procs.keys()
223  bs     = events[arches[0]].keys()[0]
224  data   = []
225  names  = []
226  for arch, style in zip(arches, ['-', ':']):
227    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
228      if event in events[arch][bs]:
229        names.append(arch+'-'+str(bs)+' '+event)
230        data.append(procs[arch][bs])
231        data.append(1e-3*np.array(events[arch][bs][event])[:,1])
232        data.append(color+style)
233      else:
234        print 'Could not find %s in %s-%d events' % (event, arch, bs)
235  plot(*data)
236  title('Performance on '+library+' Example '+str(num))
237  xlabel('Number of Processors')
238  ylabel('Computation Rate (GF/s)')
239  legend(names, 'upper left', shadow = True)
240  if filename is None:
241    show()
242  else:
243    savefig(filename)
244  return
245
246def plotSummaryLine(library, num, eventNames, sizes, times, events):
247  from pylab import legend, plot, show, title, xlabel, ylabel
248  import numpy as np
249  showTime       = False
250  showEventTime  = True
251  showEventFlops = True
252  arches         = sizes.keys()
253  # Time
254  if showTime:
255    data = []
256    for arch in arches:
257      data.append(sizes[arch])
258      data.append(times[arch])
259    plot(*data)
260    title('Performance on '+library+' Example '+str(num))
261    xlabel('Number of Dof')
262    ylabel('Time (s)')
263    legend(arches, 'upper left', shadow = True)
264    show()
265  # Common event time
266  #   We could make a stacked plot like Rio uses here
267  if showEventTime:
268    bs    = events[arches[0]].keys()[0]
269    data  = []
270    names = []
271    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
272      for arch, style in zip(arches, ['-', ':']):
273        if event in events[arch][bs]:
274          names.append(arch+'-'+str(bs)+' '+event)
275          data.append(sizes[arch][bs])
276          data.append(np.array(events[arch][bs][event])[:,0])
277          data.append(color+style)
278        else:
279          print 'Could not find %s in %s-%d events' % (event, arch, bs)
280    print data
281    plot(*data)
282    title('Performance on '+library+' Example '+str(num))
283    xlabel('Number of Dof')
284    ylabel('Time (s)')
285    legend(names, 'upper left', shadow = True)
286    show()
287  # Common event flops
288  #   We could make a stacked plot like Rio uses here
289  if showEventFlops:
290    bs    = events[arches[0]].keys()[0]
291    data  = []
292    names = []
293    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
294      for arch, style in zip(arches, ['-', ':']):
295        if event in events[arch][bs]:
296          names.append(arch+'-'+str(bs)+' '+event)
297          data.append(sizes[arch][bs])
298          data.append(np.array(events[arch][bs][event])[:,1])
299          data.append(color+style)
300        else:
301          print 'Could not find %s in %s-%d events' % (event, arch, bs)
302    plot(*data)
303    title('Performance on '+library+' Example '+str(num))
304    xlabel('Number of Dof')
305    ylabel('Computation Rate (MF/s)')
306    legend(names, 'upper left', shadow = True)
307    show()
308  return
309
310def plotSummaryBar(library, num, eventNames, sizes, times, events):
311  import numpy as np
312  import matplotlib.pyplot as plt
313
314  eventColors = ['b', 'g', 'r', 'y']
315  arches = sizes.keys()
316  names  = []
317  N      = len(sizes[arches[0]])
318  width  = 0.2
319  ind    = np.arange(N) - 0.25
320  bars   = {}
321  for arch in arches:
322    bars[arch] = []
323    bottom = np.zeros(N)
324    for event, color in zip(eventNames, eventColors):
325      names.append(arch+' '+event)
326      times = np.array(events[arch][event])[:,0]
327      bars[arch].append(plt.bar(ind, times, width, color=color, bottom=bottom))
328      bottom += times
329    ind += 0.3
330
331  plt.xlabel('Number of Dof')
332  plt.ylabel('Time (s)')
333  plt.title('GPU vs. CPU Performance on '+library+' Example '+str(num))
334  plt.xticks(np.arange(N), map(str, sizes[arches[0]]))
335  #plt.yticks(np.arange(0,81,10))
336  #plt.legend( (p1[0], p2[0]), ('Men', 'Women') )
337  plt.legend([bar[0] for bar in bars[arches[0]]], eventNames, 'upper right', shadow = True)
338
339  plt.show()
340  return
341
342def getDMComplexSize(dim, out):
343  '''Retrieves the number of cells from -dm_view output'''
344  size = 0
345  for line in out.split('\n'):
346    if line.strip().startswith(str(dim)+'-cells: '):
347      sizes = line.strip()[9:].split()
348      size  = sum(map(int, sizes))
349      break
350  return size
351
352def run_DMDA(ex, name, opts, args, sizes, times, events, log=True):
353  for n in map(int, args.size):
354    ex.run(log=log, da_grid_x=n, da_grid_y=n, **opts)
355    sizes[name].append(n*n * args.comp)
356    processSummary('summary', args.stage, args.events, times[name], events[name])
357  return
358
359def run_DMComplex(ex, name, opts, args, sizes, times, events, log=True):
360  # This should eventually be replaced by a direct FFC/Ignition interface
361  if args.operator == 'laplacian':
362    numComp  = 1
363  elif args.operator == 'elasticity':
364    numComp  = args.dim
365  else:
366    raise RuntimeError('Unknown operator: %s' % args.operator)
367
368  for numBlock in [2**i for i in map(int, args.blockExp)]:
369    opts['gpu_blocks'] = numBlock
370    args.files = ['['+','.join(source)+']']
371    buildExample(args)
372    sizes[name][numBlock]  = []
373    times[name][numBlock]  = []
374    events[name][numBlock] = {}
375    for r in map(float, args.refine):
376      out = ex.run(log=log, refinement_limit=r, **opts)
377      sizes[name][numBlock].append(getDMComplexSize(args.dim, out))
378      processSummary('summary', args.stage, args.events, times[name][numBlock], events[name][numBlock])
379  return
380
381def outputData(sizes, times, events, name = 'output.py'):
382  if os.path.exists(name):
383    base, ext = os.path.splitext(name)
384    num = 1
385    while os.path.exists(base+str(num)+ext):
386      num += 1
387    name = base+str(num)+ext
388  with file(name, 'w') as f:
389    f.write('#PETSC_ARCH='+os.environ['PETSC_ARCH']+' '+' '.join(sys.argv)+'\n')
390    f.write('sizes  = '+repr(sizes)+'\n')
391    f.write('times  = '+repr(times)+'\n')
392    f.write('events = '+repr(events)+'\n')
393  return
394
395if __name__ == '__main__':
396  import argparse
397
398  parser = argparse.ArgumentParser(description     = 'PETSc Benchmarking',
399                                   epilog          = 'This script runs src/<library>/examples/tutorials/ex<num>, For more information, visit http://www.mcs.anl.gov/petsc',
400                                   formatter_class = argparse.ArgumentDefaultsHelpFormatter)
401  parser.add_argument('--library', default='SNES',                     help='The PETSc library used in this example')
402  parser.add_argument('--num',     type = int, default='5',            help='The example number')
403  parser.add_argument('--module',  default='summary',                  help='The module for timing output')
404  parser.add_argument('--stage',   default='Main_Stage',               help='The default logging stage')
405  parser.add_argument('--events',  nargs='+',                          help='Events to process')
406  parser.add_argument('--batch',   action='store_true', default=False, help='Generate batch files for the runs instead')
407  parser.add_argument('--daemon',  action='store_true', default=False, help='Run as a daemon')
408  parser.add_argument('--gpulang', default='OpenCL',                   help='GPU Language to use: Either CUDA or OpenCL (default)')
409  subparsers = parser.add_subparsers(help='DM types')
410
411  parser_dmda = subparsers.add_parser('DMDA', help='Use a DMDA for the problem geometry')
412  parser_dmda.add_argument('--size', nargs='+',  default=['10'], help='Grid size (implementation dependent)')
413  parser_dmda.add_argument('--comp', type = int, default='1',    help='Number of field components')
414  parser_dmda.add_argument('runs',   nargs='*',                  help='Run descriptions: <name>=<args>')
415
416  parser_dmmesh = subparsers.add_parser('DMComplex', help='Use a DMComplex for the problem geometry')
417  parser_dmmesh.add_argument('--dim',      type = int, default='2',        help='Spatial dimension')
418  parser_dmmesh.add_argument('--refine',   nargs='+',  default=['0.0'],    help='List of refinement limits')
419  parser_dmmesh.add_argument('--order',    type = int, default='1',        help='Order of the finite element')
420  parser_dmmesh.add_argument('--operator', default='laplacian',            help='The operator name')
421  parser_dmmesh.add_argument('--blockExp', nargs='+', default=range(0, 5), help='List of block exponents j, block size is 2^j')
422  parser_dmmesh.add_argument('runs',       nargs='*',                      help='Run descriptions: <name>=<args>')
423
424  args = parser.parse_args()
425  print(args)
426  if hasattr(args, 'comp'):
427    args.dmType = 'DMDA'
428  else:
429    args.dmType = 'DMComplex'
430
431  ex     = PETScExample(args.library, args.num, log_summary='summary.dat', log_summary_python = None if args.batch else args.module+'.py', preload='off')
432  if args.gpulang == 'CUDA':
433    source = ex.petsc.source(args.library, args.num, '.cu')
434  else:
435    source = ex.petsc.source(args.library, args.num, 'OpenCL.c')  # Using the convention of OpenCL code residing in source files ending in 'OpenCL.c' (at least for snes/ex52)
436  sizes  = {}
437  times  = {}
438  events = {}
439  log    = not args.daemon
440
441  if args.daemon:
442    import daemon
443    print 'Starting daemon'
444    daemon.createDaemon('.')
445
446  for run in args.runs:
447    name, stropts = run.split('=', 1)
448    opts = dict([t if len(t) == 2 else (t[0], None) for t in [arg.split('=', 1) for arg in stropts.split(' ')]])
449    if args.dmType == 'DMDA':
450      sizes[name]  = []
451      times[name]  = []
452      events[name] = {}
453      run_DMDA(ex, name, opts, args, sizes, times, events, log=log)
454    elif args.dmType == 'DMComplex':
455      sizes[name]  = {}
456      times[name]  = {}
457      events[name] = {}
458      run_DMComplex(ex, name, opts, args, sizes, times, events, log=log)
459  outputData(sizes, times, events)
460  if not args.batch and log: plotSummaryLine(args.library, args.num, args.events, sizes, times, events)
461# Benchmark for ex50
462# ./src/benchmarks/benchmarkExample.py --events VecMDot VecMAXPY KSPGMRESOrthog MatMult VecCUSPCopyTo VecCUSPCopyFrom MatCUSPCopyTo --num 50 DMDA --size 10 20 50 100 --comp 4 CPU='pc_type=none mat_no_inode dm_vec_type=seq dm_mat_type=seqaij' GPU='pc_type=none mat_no_inode dm_vec_type=seqcusp dm_mat_type=seqaijcusp cusp_synchronize'
463# Benchmark for ex52
464# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dm_view show_residual=0 compute_function batch' GPU='dm_view show_residual=0 compute_function batch gpu gpu_batches=8'
465# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 --operator=elasticity CPU='dm_view op_type=elasticity show_residual=0 compute_function batch' GPU='dm_view op_type=elasticity show_residual=0 compute_function batch gpu gpu_batches=8'
466# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --dim=3 --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dim=3 dm_view show_residual=0 compute_function batch' GPU='dim=3 dm_view show_residual=0 compute_function batch gpu gpu_batches=8'
467