xref: /petsc/src/benchmarks/benchmarkExample.py (revision 00d931fe9835bef04c3bcd2a9a1bf118d64cc4c2)
1#!/usr/bin/env python
2import os,sys
3sys.path.append(os.path.join(os.environ['PETSC_DIR'], 'config'))
4sys.path.append(os.getcwd())
5from builder2 import buildExample
6from benchmarkBatch import generateBatchScript
7
8class PETSc(object):
9  def __init__(self):
10    return
11
12  def dir(self):
13    '''Return the root directory for the PETSc tree (usually $PETSC_DIR)'''
14    # This should search for a valid PETSc
15    return os.environ['PETSC_DIR']
16
17  def arch(self):
18    '''Return the PETSc build label (usually $PETSC_ARCH)'''
19    # This should be configurable
20    return os.environ['PETSC_ARCH']
21
22  def mpiexec(self):
23    '''Return the path for the mpi launch executable'''
24    mpiexec = os.path.join(self.dir(), self.arch(), 'bin', 'mpiexec')
25    if not os.path.isfile(mpiexec):
26      return None
27    return mpiexec
28
29  def example(self, num):
30    '''Return the path to the executable for a given example number'''
31    return os.path.join(self.dir(), self.arch(), 'lib', 'ex'+str(num)+'-obj', 'ex'+str(num))
32
33  def source(self, library, num, filenametail):
34    '''Return the path to the sources for a given example number'''
35    d = os.path.join(self.dir(), 'src', library.lower(), 'examples', 'tutorials')
36    name = 'ex'+str(num)
37    sources = []
38    for f in os.listdir(d):
39      if f == name+'.c':
40        sources.insert(0, f)
41      elif f.startswith(name) and f.endswith(filenametail):
42        sources.append(f)
43    return map(lambda f: os.path.join(d, f), sources)
44
45class PETScExample(object):
46  def __init__(self, library, num, **defaultOptions):
47    self.petsc   = PETSc()
48    self.library = library
49    self.num     = num
50    self.opts    = defaultOptions
51    return
52
53  @staticmethod
54  def runShellCommand(command, cwd = None, log = True):
55    import subprocess
56
57    Popen = subprocess.Popen
58    PIPE  = subprocess.PIPE
59    if log: print 'Executing: %s\n' % (command,)
60    pipe = Popen(command, cwd=cwd, stdin=None, stdout=PIPE, stderr=PIPE, bufsize=-1, shell=True, universal_newlines=True)
61    (out, err) = pipe.communicate()
62    ret = pipe.returncode
63    return (out, err, ret)
64
65  def optionsToString(self, **opts):
66    '''Convert a dictionary of options to a command line argument string'''
67    a = []
68    for key,value in opts.iteritems():
69      if value is None:
70        a.append('-'+key)
71      else:
72        a.append('-'+key+' '+str(value))
73    return ' '.join(a)
74
75  def run(self, numProcs = 1, log = True, **opts):
76    cmd = ''
77    if self.petsc.mpiexec() is not None:
78      cmd += self.petsc.mpiexec() + ' '
79      numProcs = os.environ.get('NUM_RANKS', numProcs)
80      cmd += ' -n ' + str(numProcs) + ' '
81      if os.environ.has_key('PE_HOSTFILE'):
82        cmd += ' -hostfile hostfile '
83    cmd += ' '.join([self.petsc.example(self.num), self.optionsToString(**self.opts), self.optionsToString(**opts)])
84    if 'batch' in opts and opts['batch']:
85      del opts['batch']
86      filename = generateBatchScript(self.num, numProcs, 120, ' '+self.optionsToString(**self.opts)+' '+self.optionsToString(**opts))
87      # Submit job
88      out, err, ret = self.runShellCommand('qsub -q gpu '+filename, log = log)
89      if ret:
90        print err
91        print out
92    else:
93      out, err, ret = self.runShellCommand(cmd, log = log)
94      if ret:
95        print err
96        print out
97    return out
98
99def processSummary(moduleName, defaultStage, eventNames, times, events):
100  '''Process the Python log summary into plot data'''
101  m = __import__(moduleName)
102  reload(m)
103  # Total Time
104  times.append(m.Time[0])
105  # Particular events
106  for name in eventNames:
107    if name.find(':') >= 0:
108      stageName, name = name.split(':', 1)
109      stage = getattr(m, stageName)
110    else:
111      stage = getattr(m, defaultStage)
112    if name in stage.event:
113      if not name in events:
114        events[name] = []
115      try:
116        events[name].append((max(stage.event[name].Time), sum(stage.event[name].Flops)/(max(stage.event[name].Time) * 1e6)))
117      except ZeroDivisionError:
118        events[name].append((max(stage.event[name].Time), 0))
119  return
120
121def plotTime(library, num, eventNames, sizes, times, events):
122  from pylab import legend, plot, show, title, xlabel, ylabel
123  import numpy as np
124
125  arches = sizes.keys()
126  data   = []
127  for arch in arches:
128    data.append(sizes[arch])
129    data.append(times[arch])
130  plot(*data)
131  title('Performance on '+library+' Example '+str(num))
132  xlabel('Number of Dof')
133  ylabel('Time (s)')
134  legend(arches, 'upper left', shadow = True)
135  show()
136  return
137
138def plotEventTime(library, num, eventNames, sizes, times, events, filename = None):
139  from pylab import close, legend, plot, savefig, show, title, xlabel, ylabel
140  import numpy as np
141
142  close()
143  arches = sizes.keys()
144  bs     = events[arches[0]].keys()[0]
145  data   = []
146  names  = []
147  for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
148    for arch, style in zip(arches, ['-', ':']):
149      if event in events[arch][bs]:
150        names.append(arch+'-'+str(bs)+' '+event)
151        data.append(sizes[arch][bs])
152        data.append(np.array(events[arch][bs][event])[:,0])
153        data.append(color+style)
154      else:
155        print 'Could not find %s in %s-%d events' % (event, arch, bs)
156  print data
157  plot(*data)
158  title('Performance on '+library+' Example '+str(num))
159  xlabel('Number of Dof')
160  ylabel('Time (s)')
161  legend(names, 'upper left', shadow = True)
162  if filename is None:
163    show()
164  else:
165    savefig(filename)
166  return
167
168def plotEventFlop(library, num, eventNames, sizes, times, events, filename = None):
169  from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel
170  import numpy as np
171
172  arches = sizes.keys()
173  bs     = events[arches[0]].keys()[0]
174  data   = []
175  names  = []
176  for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
177    for arch, style in zip(arches, ['-', ':']):
178      if event in events[arch][bs]:
179        names.append(arch+'-'+str(bs)+' '+event)
180        data.append(sizes[arch][bs])
181        data.append(1e-3*np.array(events[arch][bs][event])[:,1])
182        data.append(color+style)
183      else:
184        print 'Could not find %s in %s-%d events' % (event, arch, bs)
185  semilogy(*data)
186  title('Performance on '+library+' Example '+str(num))
187  xlabel('Number of Dof')
188  ylabel('Computation Rate (GF/s)')
189  legend(names, 'upper left', shadow = True)
190  if filename is None:
191    show()
192  else:
193    savefig(filename)
194  return
195
196def plotEventScaling(library, num, eventNames, procs, events, filename = None):
197  from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel
198  import numpy as np
199
200  arches = procs.keys()
201  bs     = events[arches[0]].keys()[0]
202  data   = []
203  names  = []
204  for arch, style in zip(arches, ['-', ':']):
205    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
206      if event in events[arch][bs]:
207        names.append(arch+'-'+str(bs)+' '+event)
208        data.append(procs[arch][bs])
209        data.append(1e-3*np.array(events[arch][bs][event])[:,1])
210        data.append(color+style)
211      else:
212        print 'Could not find %s in %s-%d events' % (event, arch, bs)
213  plot(*data)
214  title('Performance on '+library+' Example '+str(num))
215  xlabel('Number of Processors')
216  ylabel('Computation Rate (GF/s)')
217  legend(names, 'upper left', shadow = True)
218  if filename is None:
219    show()
220  else:
221    savefig(filename)
222  return
223
224def plotSummaryLine(library, num, eventNames, sizes, times, events):
225  from pylab import legend, plot, show, title, xlabel, ylabel
226  import numpy as np
227  showTime       = False
228  showEventTime  = True
229  showEventFlops = True
230  arches         = sizes.keys()
231  # Time
232  if showTime:
233    data = []
234    for arch in arches:
235      data.append(sizes[arch])
236      data.append(times[arch])
237    plot(*data)
238    title('Performance on '+library+' Example '+str(num))
239    xlabel('Number of Dof')
240    ylabel('Time (s)')
241    legend(arches, 'upper left', shadow = True)
242    show()
243  # Common event time
244  #   We could make a stacked plot like Rio uses here
245  if showEventTime:
246    bs    = events[arches[0]].keys()[0]
247    data  = []
248    names = []
249    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
250      for arch, style in zip(arches, ['-', ':']):
251        if event in events[arch][bs]:
252          names.append(arch+'-'+str(bs)+' '+event)
253          data.append(sizes[arch][bs])
254          data.append(np.array(events[arch][bs][event])[:,0])
255          data.append(color+style)
256        else:
257          print 'Could not find %s in %s-%d events' % (event, arch, bs)
258    print data
259    plot(*data)
260    title('Performance on '+library+' Example '+str(num))
261    xlabel('Number of Dof')
262    ylabel('Time (s)')
263    legend(names, 'upper left', shadow = True)
264    show()
265  # Common event flops
266  #   We could make a stacked plot like Rio uses here
267  if showEventFlops:
268    bs    = events[arches[0]].keys()[0]
269    data  = []
270    names = []
271    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
272      for arch, style in zip(arches, ['-', ':']):
273        if event in events[arch][bs]:
274          names.append(arch+'-'+str(bs)+' '+event)
275          data.append(sizes[arch][bs])
276          data.append(np.array(events[arch][bs][event])[:,1])
277          data.append(color+style)
278        else:
279          print 'Could not find %s in %s-%d events' % (event, arch, bs)
280    plot(*data)
281    title('Performance on '+library+' Example '+str(num))
282    xlabel('Number of Dof')
283    ylabel('Computation Rate (MF/s)')
284    legend(names, 'upper left', shadow = True)
285    show()
286  return
287
288def plotSummaryBar(library, num, eventNames, sizes, times, events):
289  import numpy as np
290  import matplotlib.pyplot as plt
291
292  eventColors = ['b', 'g', 'r', 'y']
293  arches = sizes.keys()
294  names  = []
295  N      = len(sizes[arches[0]])
296  width  = 0.2
297  ind    = np.arange(N) - 0.25
298  bars   = {}
299  for arch in arches:
300    bars[arch] = []
301    bottom = np.zeros(N)
302    for event, color in zip(eventNames, eventColors):
303      names.append(arch+' '+event)
304      times = np.array(events[arch][event])[:,0]
305      bars[arch].append(plt.bar(ind, times, width, color=color, bottom=bottom))
306      bottom += times
307    ind += 0.3
308
309  plt.xlabel('Number of Dof')
310  plt.ylabel('Time (s)')
311  plt.title('GPU vs. CPU Performance on '+library+' Example '+str(num))
312  plt.xticks(np.arange(N), map(str, sizes[arches[0]]))
313  #plt.yticks(np.arange(0,81,10))
314  #plt.legend( (p1[0], p2[0]), ('Men', 'Women') )
315  plt.legend([bar[0] for bar in bars[arches[0]]], eventNames, 'upper right', shadow = True)
316
317  plt.show()
318  return
319
320def getDMComplexSize(dim, out):
321  '''Retrieves the number of cells from -dm_view output'''
322  size = 0
323  for line in out.split('\n'):
324    if line.strip().startswith(str(dim)+'-cells: '):
325      sizes = line.strip()[9:].split()
326      size  = sum(map(int, sizes))
327      break
328  return size
329
330def run_DMDA(ex, name, opts, args, sizes, times, events, log=True):
331  for n in map(int, args.size):
332    ex.run(log=log, da_grid_x=n, da_grid_y=n, **opts)
333    sizes[name].append(n*n * args.comp)
334    processSummary('summary', args.stage, args.events, times[name], events[name])
335  return
336
337def run_DMComplex(ex, name, opts, args, sizes, times, events, log=True):
338  # This should eventually be replaced by a direct FFC/Ignition interface
339  if args.operator == 'laplacian':
340    numComp  = 1
341  elif args.operator == 'elasticity':
342    numComp  = args.dim
343  else:
344    raise RuntimeError('Unknown operator: %s' % args.operator)
345
346  for numBlock in [2**i for i in map(int, args.blockExp)]:
347    opts['gpu_blocks'] = numBlock
348    args.files = ['['+','.join(source)+']']
349    buildExample(args)
350    sizes[name][numBlock]  = []
351    times[name][numBlock]  = []
352    events[name][numBlock] = {}
353    for r in map(float, args.refine):
354      out = ex.run(log=log, refinement_limit=r, **opts)
355      sizes[name][numBlock].append(getDMComplexSize(args.dim, out))
356      processSummary('summary', args.stage, args.events, times[name][numBlock], events[name][numBlock])
357  return
358
359def outputData(sizes, times, events, name = 'output.py'):
360  if os.path.exists(name):
361    base, ext = os.path.splitext(name)
362    num = 1
363    while os.path.exists(base+str(num)+ext):
364      num += 1
365    name = base+str(num)+ext
366  with file(name, 'w') as f:
367    f.write('#PETSC_ARCH='+os.environ['PETSC_ARCH']+' '+' '.join(sys.argv)+'\n')
368    f.write('sizes  = '+repr(sizes)+'\n')
369    f.write('times  = '+repr(times)+'\n')
370    f.write('events = '+repr(events)+'\n')
371  return
372
373if __name__ == '__main__':
374  import argparse
375
376  parser = argparse.ArgumentParser(description     = 'PETSc Benchmarking',
377                                   epilog          = 'This script runs src/<library>/examples/tutorials/ex<num>, For more information, visit http://www.mcs.anl.gov/petsc',
378                                   formatter_class = argparse.ArgumentDefaultsHelpFormatter)
379  parser.add_argument('--library', default='SNES',                     help='The PETSc library used in this example')
380  parser.add_argument('--num',     type = int, default='5',            help='The example number')
381  parser.add_argument('--module',  default='summary',                  help='The module for timing output')
382  parser.add_argument('--stage',   default='Main_Stage',               help='The default logging stage')
383  parser.add_argument('--events',  nargs='+',                          help='Events to process')
384  parser.add_argument('--batch',   action='store_true', default=False, help='Generate batch files for the runs instead')
385  parser.add_argument('--daemon',  action='store_true', default=False, help='Run as a daemon')
386  parser.add_argument('--gpulang', default='OpenCL',                   help='GPU Language to use: Either CUDA or OpenCL (default)')
387  subparsers = parser.add_subparsers(help='DM types')
388
389  parser_dmda = subparsers.add_parser('DMDA', help='Use a DMDA for the problem geometry')
390  parser_dmda.add_argument('--size', nargs='+',  default=['10'], help='Grid size (implementation dependent)')
391  parser_dmda.add_argument('--comp', type = int, default='1',    help='Number of field components')
392  parser_dmda.add_argument('runs',   nargs='*',                  help='Run descriptions: <name>=<args>')
393
394  parser_dmmesh = subparsers.add_parser('DMComplex', help='Use a DMComplex for the problem geometry')
395  parser_dmmesh.add_argument('--dim',      type = int, default='2',        help='Spatial dimension')
396  parser_dmmesh.add_argument('--refine',   nargs='+',  default=['0.0'],    help='List of refinement limits')
397  parser_dmmesh.add_argument('--order',    type = int, default='1',        help='Order of the finite element')
398  parser_dmmesh.add_argument('--operator', default='laplacian',            help='The operator name')
399  parser_dmmesh.add_argument('--blockExp', nargs='+', default=range(0, 5), help='List of block exponents j, block size is 2^j')
400  parser_dmmesh.add_argument('runs',       nargs='*',                      help='Run descriptions: <name>=<args>')
401
402  args = parser.parse_args()
403  print(args)
404  if hasattr(args, 'comp'):
405    args.dmType = 'DMDA'
406  else:
407    args.dmType = 'DMComplex'
408
409  ex     = PETScExample(args.library, args.num, log_summary='summary.dat', log_summary_python = None if args.batch else args.module+'.py', preload='off')
410  if args.gpulang == 'CUDA':
411    source = ex.petsc.source(args.library, args.num, '.cu')
412  else:
413    source = ex.petsc.source(args.library, args.num, 'OpenCL.c')  # Using the convention of OpenCL code residing in source files ending in 'OpenCL.c' (at least for snes/ex52)
414  sizes  = {}
415  times  = {}
416  events = {}
417  log    = not args.daemon
418
419  if args.daemon:
420    import daemon
421    print 'Starting daemon'
422    daemon.createDaemon('.')
423
424  for run in args.runs:
425    name, stropts = run.split('=', 1)
426    opts = dict([t if len(t) == 2 else (t[0], None) for t in [arg.split('=', 1) for arg in stropts.split(' ')]])
427    if args.dmType == 'DMDA':
428      sizes[name]  = []
429      times[name]  = []
430      events[name] = {}
431      run_DMDA(ex, name, opts, args, sizes, times, events, log=log)
432    elif args.dmType == 'DMComplex':
433      sizes[name]  = {}
434      times[name]  = {}
435      events[name] = {}
436      run_DMComplex(ex, name, opts, args, sizes, times, events, log=log)
437  outputData(sizes, times, events)
438  if not args.batch and log: plotSummaryLine(args.library, args.num, args.events, sizes, times, events)
439# Benchmark for ex50
440# ./src/benchmarks/benchmarkExample.py --events VecMDot VecMAXPY KSPGMRESOrthog MatMult VecCUSPCopyTo VecCUSPCopyFrom MatCUSPCopyTo --num 50 DMDA --size 10 20 50 100 --comp 4 CPU='pc_type=none mat_no_inode dm_vec_type=seq dm_mat_type=seqaij' GPU='pc_type=none mat_no_inode dm_vec_type=seqcusp dm_mat_type=seqaijcusp cusp_synchronize'
441# Benchmark for ex52
442# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dm_view show_residual=0 compute_function batch' GPU='dm_view show_residual=0 compute_function batch gpu gpu_batches=8'
443# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 --operator=elasticity CPU='dm_view op_type=elasticity show_residual=0 compute_function batch' GPU='dm_view op_type=elasticity show_residual=0 compute_function batch gpu gpu_batches=8'
444# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --dim=3 --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dim=3 dm_view show_residual=0 compute_function batch' GPU='dim=3 dm_view show_residual=0 compute_function batch gpu gpu_batches=8'
445