xref: /petsc/src/benchmarks/benchmarkExample.py (revision 929aa6beb2ae61c5d03a22a1e8b1306d8223d988)
1#!/usr/bin/env python
2import os,sys
3sys.path.append(os.path.join(os.environ['PETSC_DIR'], 'config'))
4from builder2 import buildExample
5from benchmarkBatch import generateBatchScript
6
7class PETSc(object):
8  def __init__(self):
9    return
10
11  def dir(self):
12    '''Return the root directory for the PETSc tree (usually $PETSC_DIR)'''
13    # This should search for a valid PETSc
14    return os.environ['PETSC_DIR']
15
16  def arch(self):
17    '''Return the PETSc build label (usually $PETSC_ARCH)'''
18    # This should be configurable
19    return os.environ['PETSC_ARCH']
20
21  def mpiexec(self):
22    '''Return the path for the mpi launch executable'''
23    mpiexec = os.path.join(self.dir(), self.arch(), 'bin', 'mpiexec')
24    if not os.path.isfile(mpiexec):
25      return None
26    return mpiexec
27
28  def example(self, num):
29    '''Return the path to the executable for a given example number'''
30    return os.path.join(self.dir(), self.arch(), 'lib', 'ex'+str(num)+'-obj', 'ex'+str(num))
31
32  def source(self, library, num):
33    '''Return the path to the sources for a given example number'''
34    d = os.path.join(self.dir(), 'src', library.lower(), 'examples', 'tutorials')
35    name = 'ex'+str(num)
36    sources = []
37    for f in os.listdir(d):
38      if f == name+'.c':
39        sources.insert(0, f)
40      elif f.startswith(name) and f.endswith('.cu'):
41        sources.append(f)
42    return map(lambda f: os.path.join(d, f), sources)
43
44class PETScExample(object):
45  def __init__(self, library, num, **defaultOptions):
46    self.petsc   = PETSc()
47    self.library = library
48    self.num     = num
49    self.opts    = defaultOptions
50    return
51
52  @staticmethod
53  def runShellCommand(command, cwd = None, log = True):
54    import subprocess
55
56    Popen = subprocess.Popen
57    PIPE  = subprocess.PIPE
58    if log: print 'Executing: %s\n' % (command,)
59    pipe = Popen(command, cwd=cwd, stdin=None, stdout=PIPE, stderr=PIPE, bufsize=-1, shell=True, universal_newlines=True)
60    (out, err) = pipe.communicate()
61    ret = pipe.returncode
62    return (out, err, ret)
63
64  def optionsToString(self, **opts):
65    '''Convert a dictionary of options to a command line argument string'''
66    a = []
67    for key,value in opts.iteritems():
68      if value is None:
69        a.append('-'+key)
70      else:
71        a.append('-'+key+' '+str(value))
72    return ' '.join(a)
73
74  def run(self, numProcs = 1, log = True, **opts):
75    if self.petsc.mpiexec() is None:
76      cmd = self.petsc.example(self.num)
77    else:
78      cmd = ' '.join([self.petsc.mpiexec(), '-n', str(numProcs), self.petsc.example(self.num)])
79    cmd += ' '+self.optionsToString(**self.opts)+' '+self.optionsToString(**opts)
80    if 'batch' in opts and opts['batch']:
81      del opts['batch']
82      filename = generateBatchScript(self.num, numProcs, 120, ' '+self.optionsToString(**self.opts)+' '+self.optionsToString(**opts))
83      # Submit job
84      out, err, ret = self.runShellCommand('qsub -q gpu '+filename, log = log)
85      if ret:
86        print err
87        print out
88    else:
89      out, err, ret = self.runShellCommand(cmd, log = log)
90      if ret:
91        print err
92        print out
93    return out
94
95def processSummary(moduleName, defaultStage, eventNames, times, events):
96  '''Process the Python log summary into plot data'''
97  m = __import__(moduleName)
98  reload(m)
99  # Total Time
100  times.append(m.Time[0])
101  # Particular events
102  for name in eventNames:
103    if name.find(':') >= 0:
104      stageName, name = name.split(':', 1)
105      stage = getattr(m, stageName)
106    else:
107      stage = getattr(m, defaultStage)
108    if name in stage.event:
109      if not name in events:
110        events[name] = []
111      try:
112        events[name].append((stage.event[name].Time[0], stage.event[name].Flops[0]/(stage.event[name].Time[0] * 1e6)))
113      except ZeroDivisionError:
114        events[name].append((stage.event[name].Time[0], 0))
115  return
116
117def plotTime(library, num, eventNames, sizes, times, events):
118  from pylab import legend, plot, show, title, xlabel, ylabel
119  import numpy as np
120
121  arches = sizes.keys()
122  data   = []
123  for arch in arches:
124    data.append(sizes[arch])
125    data.append(times[arch])
126  plot(*data)
127  title('Performance on '+library+' Example '+str(num))
128  xlabel('Number of Dof')
129  ylabel('Time (s)')
130  legend(arches, 'upper left', shadow = True)
131  show()
132  return
133
134def plotEventTime(library, num, eventNames, sizes, times, events, filename = None):
135  from pylab import close, legend, plot, savefig, show, title, xlabel, ylabel
136  import numpy as np
137
138  close()
139  arches = sizes.keys()
140  bs     = events[arches[0]].keys()[0]
141  data   = []
142  names  = []
143  for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
144    for arch, style in zip(arches, ['-', ':']):
145      if event in events[arch][bs]:
146        names.append(arch+'-'+str(bs)+' '+event)
147        data.append(sizes[arch][bs])
148        data.append(np.array(events[arch][bs][event])[:,0])
149        data.append(color+style)
150      else:
151        print 'Could not find %s in %s-%d events' % (event, arch, bs)
152  print data
153  plot(*data)
154  title('Performance on '+library+' Example '+str(num))
155  xlabel('Number of Dof')
156  ylabel('Time (s)')
157  legend(names, 'upper left', shadow = True)
158  if filename is None:
159    show()
160  else:
161    savefig(filename)
162  return
163
164def plotEventFlop(library, num, eventNames, sizes, times, events, filename = None):
165  from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel
166  import numpy as np
167
168  arches = sizes.keys()
169  bs     = events[arches[0]].keys()[0]
170  data   = []
171  names  = []
172  for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
173    for arch, style in zip(arches, ['-', ':']):
174      if event in events[arch][bs]:
175        names.append(arch+'-'+str(bs)+' '+event)
176        data.append(sizes[arch][bs])
177        data.append(1e-3*np.array(events[arch][bs][event])[:,1])
178        data.append(color+style)
179      else:
180        print 'Could not find %s in %s-%d events' % (event, arch, bs)
181  semilogy(*data)
182  title('Performance on '+library+' Example '+str(num))
183  xlabel('Number of Dof')
184  ylabel('Computation Rate (GF/s)')
185  legend(names, 'upper left', shadow = True)
186  if filename is None:
187    show()
188  else:
189    savefig(filename)
190  return
191
192def plotEventScaling(library, num, eventNames, procs, events, filename = None):
193  from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel
194  import numpy as np
195
196  arches = procs.keys()
197  bs     = events[arches[0]].keys()[0]
198  data   = []
199  names  = []
200  for arch, style in zip(arches, ['-', ':']):
201    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
202      if event in events[arch][bs]:
203        names.append(arch+'-'+str(bs)+' '+event)
204        data.append(procs[arch][bs])
205        data.append(1e-3*np.array(events[arch][bs][event])[:,1])
206        data.append(color+style)
207      else:
208        print 'Could not find %s in %s-%d events' % (event, arch, bs)
209  plot(*data)
210  title('Performance on '+library+' Example '+str(num))
211  xlabel('Number of Processors')
212  ylabel('Computation Rate (GF/s)')
213  legend(names, 'upper left', shadow = True)
214  if filename is None:
215    show()
216  else:
217    savefig(filename)
218  return
219
220def plotSummaryLine(library, num, eventNames, sizes, times, events):
221  from pylab import legend, plot, show, title, xlabel, ylabel
222  import numpy as np
223  showTime       = False
224  showEventTime  = True
225  showEventFlops = True
226  arches         = sizes.keys()
227  # Time
228  if showTime:
229    data = []
230    for arch in arches:
231      data.append(sizes[arch])
232      data.append(times[arch])
233    plot(*data)
234    title('Performance on '+library+' Example '+str(num))
235    xlabel('Number of Dof')
236    ylabel('Time (s)')
237    legend(arches, 'upper left', shadow = True)
238    show()
239  # Common event time
240  #   We could make a stacked plot like Rio uses here
241  if showEventTime:
242    bs    = events[arches[0]].keys()[0]
243    data  = []
244    names = []
245    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
246      for arch, style in zip(arches, ['-', ':']):
247        if event in events[arch][bs]:
248          names.append(arch+'-'+str(bs)+' '+event)
249          data.append(sizes[arch][bs])
250          data.append(np.array(events[arch][bs][event])[:,0])
251          data.append(color+style)
252        else:
253          print 'Could not find %s in %s-%d events' % (event, arch, bs)
254    print data
255    plot(*data)
256    title('Performance on '+library+' Example '+str(num))
257    xlabel('Number of Dof')
258    ylabel('Time (s)')
259    legend(names, 'upper left', shadow = True)
260    show()
261  # Common event flops
262  #   We could make a stacked plot like Rio uses here
263  if showEventFlops:
264    bs    = events[arches[0]].keys()[0]
265    data  = []
266    names = []
267    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
268      for arch, style in zip(arches, ['-', ':']):
269        if event in events[arch][bs]:
270          names.append(arch+'-'+str(bs)+' '+event)
271          data.append(sizes[arch][bs])
272          data.append(np.array(events[arch][bs][event])[:,1])
273          data.append(color+style)
274        else:
275          print 'Could not find %s in %s-%d events' % (event, arch, bs)
276    plot(*data)
277    title('Performance on '+library+' Example '+str(num))
278    xlabel('Number of Dof')
279    ylabel('Computation Rate (MF/s)')
280    legend(names, 'upper left', shadow = True)
281    show()
282  return
283
284def plotSummaryBar(library, num, eventNames, sizes, times, events):
285  import numpy as np
286  import matplotlib.pyplot as plt
287
288  eventColors = ['b', 'g', 'r', 'y']
289  arches = sizes.keys()
290  names  = []
291  N      = len(sizes[arches[0]])
292  width  = 0.2
293  ind    = np.arange(N) - 0.25
294  bars   = {}
295  for arch in arches:
296    bars[arch] = []
297    bottom = np.zeros(N)
298    for event, color in zip(eventNames, eventColors):
299      names.append(arch+' '+event)
300      times = np.array(events[arch][event])[:,0]
301      bars[arch].append(plt.bar(ind, times, width, color=color, bottom=bottom))
302      bottom += times
303    ind += 0.3
304
305  plt.xlabel('Number of Dof')
306  plt.ylabel('Time (s)')
307  plt.title('GPU vs. CPU Performance on '+library+' Example '+str(num))
308  plt.xticks(np.arange(N), map(str, sizes[arches[0]]))
309  #plt.yticks(np.arange(0,81,10))
310  #plt.legend( (p1[0], p2[0]), ('Men', 'Women') )
311  plt.legend([bar[0] for bar in bars[arches[0]]], eventNames, 'upper right', shadow = True)
312
313  plt.show()
314  return
315
316def getDMComplexSize(dim, out):
317  '''Retrieves the number of cells from '''
318  size = 0
319  for line in out.split('\n'):
320    if line.strip().startswith(str(dim)+'-cells: '):
321      size = int(line.strip()[9:])
322      break
323  return size
324
325def run_DMDA(ex, name, opts, args, sizes, times, events, log=True):
326  for n in map(int, args.size):
327    ex.run(log=log, da_grid_x=n, da_grid_y=n, **opts)
328    sizes[name].append(n*n * args.comp)
329    processSummary('summary', args.stage, args.events, times[name], events[name])
330  return
331
332def run_DMComplex(ex, name, opts, args, sizes, times, events, log=True):
333  # This should eventually be replaced by a direct FFC/Ignition interface
334  if args.operator == 'laplacian':
335    numComp  = 1
336  elif args.operator == 'elasticity':
337    numComp  = args.dim
338  else:
339    raise RuntimeError('Unknown operator: %s' % args.operator)
340
341  for numBlock in [2**i for i in map(int, args.blockExp)]:
342    opts['gpu_blocks'] = numBlock
343    # Generate new block size
344    cmd = './bin/pythonscripts/PetscGenerateFEMQuadrature.py %d %d %d %d %s %s.h' % (args.dim, args.order, numComp, numBlock, args.operator, os.path.splitext(source[0])[0])
345    print(cmd)
346    ret = os.system('python '+cmd)
347    args.files = ['['+','.join(source)+']']
348    buildExample(args)
349    sizes[name][numBlock]  = []
350    times[name][numBlock]  = []
351    events[name][numBlock] = {}
352    for r in map(float, args.refine):
353      out = ex.run(log=log, refinement_limit=r, **opts)
354      sizes[name][numBlock].append(getDMComplexSize(args.dim, out))
355      processSummary('summary', args.stage, args.events, times[name][numBlock], events[name][numBlock])
356  return
357
358def outputData(sizes, times, events, name = 'output.py'):
359  if os.path.exists(name):
360    base, ext = os.path.splitext(name)
361    num = 1
362    while os.path.exists(base+str(num)+ext):
363      num += 1
364    name = base+str(num)+ext
365  with file(name, 'w') as f:
366    f.write('#PETSC_ARCH='+os.environ['PETSC_ARCH']+' '+' '.join(sys.argv)+'\n')
367    f.write('sizes  = '+repr(sizes)+'\n')
368    f.write('times  = '+repr(times)+'\n')
369    f.write('events = '+repr(events)+'\n')
370  return
371
372if __name__ == '__main__':
373  import argparse
374
375  parser = argparse.ArgumentParser(description     = 'PETSc Benchmarking',
376                                   epilog          = 'This script runs src/<library>/examples/tutorials/ex<num>, For more information, visit http://www.mcs.anl.gov/petsc',
377                                   formatter_class = argparse.ArgumentDefaultsHelpFormatter)
378  parser.add_argument('--library', default='SNES',                     help='The PETSc library used in this example')
379  parser.add_argument('--num',     type = int, default='5',            help='The example number')
380  parser.add_argument('--module',  default='summary',                  help='The module for timing output')
381  parser.add_argument('--stage',   default='Main_Stage',               help='The default logging stage')
382  parser.add_argument('--events',  nargs='+',                          help='Events to process')
383  parser.add_argument('--batch',   action='store_true', default=False, help='Generate batch files for the runs instead')
384  parser.add_argument('--daemon',  action='store_true', default=False, help='Run as a daemon')
385  subparsers = parser.add_subparsers(help='DM types')
386
387  parser_dmda = subparsers.add_parser('DMDA', help='Use a DMDA for the problem geometry')
388  parser_dmda.add_argument('--size', nargs='+',  default=['10'], help='Grid size (implementation dependent)')
389  parser_dmda.add_argument('--comp', type = int, default='1',    help='Number of field components')
390  parser_dmda.add_argument('runs',   nargs='*',                  help='Run descriptions: <name>=<args>')
391
392  parser_dmmesh = subparsers.add_parser('DMComplex', help='Use a DMComplex for the problem geometry')
393  parser_dmmesh.add_argument('--dim',      type = int, default='2',        help='Spatial dimension')
394  parser_dmmesh.add_argument('--refine',   nargs='+',  default=['0.0'],    help='List of refinement limits')
395  parser_dmmesh.add_argument('--order',    type = int, default='1',        help='Order of the finite element')
396  parser_dmmesh.add_argument('--operator', default='laplacian',            help='The operator name')
397  parser_dmmesh.add_argument('--blockExp', nargs='+', default=range(0, 5), help='List of block exponents j, block size is 2^j')
398  parser_dmmesh.add_argument('runs',       nargs='*',                      help='Run descriptions: <name>=<args>')
399
400  args = parser.parse_args()
401  print(args)
402  if hasattr(args, 'comp'):
403    args.dmType = 'DMDA'
404  else:
405    args.dmType = 'DMComplex'
406
407  ex     = PETScExample(args.library, args.num, log_summary='summary.dat', log_summary_python = None if args.batch else args.module+'.py', preload='off')
408  source = ex.petsc.source(args.library, args.num)
409  sizes  = {}
410  times  = {}
411  events = {}
412  log    = not args.daemon
413
414  if args.daemon:
415    import daemon
416    print 'Starting daemon'
417    daemon.createDaemon('.')
418
419  for run in args.runs:
420    name, stropts = run.split('=', 1)
421    opts = dict([t if len(t) == 2 else (t[0], None) for t in [arg.split('=', 1) for arg in stropts.split(' ')]])
422    if args.dmType == 'DMDA':
423      sizes[name]  = []
424      times[name]  = []
425      events[name] = {}
426      run_DMDA(ex, name, opts, args, sizes, times, events, log=log)
427    elif args.dmType == 'DMComplex':
428      sizes[name]  = {}
429      times[name]  = {}
430      events[name] = {}
431      run_DMComplex(ex, name, opts, args, sizes, times, events, log=log)
432  outputData(sizes, times, events)
433  if not args.batch and log: plotSummaryLine(args.library, args.num, args.events, sizes, times, events)
434# Benchmark for ex50
435# ./src/benchmarks/benchmarkExample.py --events VecMDot VecMAXPY KSPGMRESOrthog MatMult VecCUSPCopyTo VecCUSPCopyFrom MatCUSPCopyTo --num 50 DMDA --size 10 20 50 100 --comp 4 CPU='pc_type=none mat_no_inode dm_vec_type=seq dm_mat_type=seqaij' GPU='pc_type=none mat_no_inode dm_vec_type=seqcusp dm_mat_type=seqaijcusp cusp_synchronize'
436# Benchmark for ex52
437# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dm_view show_residual=0 compute_function batch' GPU='dm_view show_residual=0 compute_function batch gpu gpu_batches=8'
438# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 --operator=elasticity CPU='dm_view op_type=elasticity show_residual=0 compute_function batch' GPU='dm_view op_type=elasticity show_residual=0 compute_function batch gpu gpu_batches=8'
439# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --dim=3 --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dim=3 dm_view show_residual=0 compute_function batch' GPU='dim=3 dm_view show_residual=0 compute_function batch gpu gpu_batches=8'
440