xref: /petsc/src/benchmarks/benchmarkExample.py (revision feff33ee0b5b037fa8f9f294dede656a2f85cc47)
1#!/usr/bin/env python
2from __future__ import print_function
3import os,sys
4sys.path.append(os.path.join(os.environ['PETSC_DIR'], 'config'))
5sys.path.append(os.getcwd())
6from builder2 import buildExample
7from benchmarkBatch import generateBatchScript
8
9class PETSc(object):
10  def __init__(self):
11    return
12
13  def dir(self):
14    '''Return the root directory for the PETSc tree (usually $PETSC_DIR)'''
15    # This should search for a valid PETSc
16    return os.environ['PETSC_DIR']
17
18  def arch(self):
19    '''Return the PETSc build label (usually $PETSC_ARCH)'''
20    # This should be configurable
21    return os.environ['PETSC_ARCH']
22
23  def mpiexec(self):
24    '''Return the path for the mpi launch executable'''
25    mpiexec = os.path.join(self.dir(), self.arch(), 'bin', 'mpiexec')
26    if not os.path.isfile(mpiexec):
27      return None
28    return mpiexec
29
30  def example(self, library, num):
31    '''Return the path to the executable for a given example number'''
32    return os.path.join(self.dir(), self.arch(), 'lib', library.lower(), 'ex'+str(num))
33
34  def source(self, library, num, filenametail):
35    '''Return the path to the sources for a given example number'''
36    d = os.path.join(self.dir(), 'src', library.lower(), 'examples', 'tutorials')
37    name = 'ex'+str(num)
38    sources = []
39    for f in os.listdir(d):
40      if f == name+'.c':
41        sources.insert(0, f)
42      elif f.startswith(name) and f.endswith(filenametail):
43        sources.append(f)
44    return map(lambda f: os.path.join(d, f), sources)
45
46class PETScExample(object):
47  def __init__(self, library, num, **defaultOptions):
48    self.petsc   = PETSc()
49    self.library = library
50    self.num     = num
51    self.opts    = defaultOptions
52    return
53
54  @staticmethod
55  def runShellCommand(command, cwd = None, log = True):
56    import subprocess
57
58    Popen = subprocess.Popen
59    PIPE  = subprocess.PIPE
60    if log: print('Executing: %s\n' % (command,))
61    pipe = Popen(command, cwd=cwd, stdin=None, stdout=PIPE, stderr=PIPE, bufsize=-1, shell=True, universal_newlines=True)
62    (out, err) = pipe.communicate()
63    ret = pipe.returncode
64    return (out, err, ret)
65
66  def optionsToString(self, **opts):
67    '''Convert a dictionary of options to a command line argument string'''
68    a = []
69    for key,value in opts.iteritems():
70      if value is None:
71        a.append('-'+key)
72      else:
73        a.append('-'+key+' '+str(value))
74    return ' '.join(a)
75
76  def build(self, log = True):
77    sdir = os.path.join(self.petsc.dir(), 'src', self.library.lower(), 'examples', 'tutorials')
78    odir = os.getcwd()
79    os.chdir(sdir)
80    cmd = 'make ex'+str(self.num)
81    out, err, ret = self.runShellCommand(cmd, cwd = sdir, log = log)
82    if err:
83      raise RuntimeError('Unable to build example:\n'+err+out)
84    os.chdir(odir)
85    bdir = os.path.dirname(self.petsc.example(self.library, self.num))
86    try:
87      os.makedirs(bdir)
88    except OSError:
89      if not os.path.isdir(bdir):
90        raise
91    cmd = 'mv '+os.path.join(sdir, 'ex'+str(self.num))+' '+self.petsc.example(self.library, self.num)
92    out, err, ret = self.runShellCommand(cmd, log = log)
93    if ret:
94      print(err)
95      print(out)
96    return
97
98  def run(self, numProcs = 1, log = True, **opts):
99    cmd = ''
100    if self.petsc.mpiexec() is not None:
101      cmd += self.petsc.mpiexec() + ' '
102      numProcs = os.environ.get('NUM_RANKS', numProcs)
103      cmd += ' -n ' + str(numProcs) + ' '
104      if 'PE_HOSTFILE' in os.environ:
105        cmd += ' -hostfile hostfile '
106    cmd += ' '.join([self.petsc.example(self.library, self.num), self.optionsToString(**self.opts), self.optionsToString(**opts)])
107    if 'batch' in opts and opts['batch']:
108      del opts['batch']
109      filename = generateBatchScript(self.num, numProcs, 120, ' '+self.optionsToString(**self.opts)+' '+self.optionsToString(**opts))
110      # Submit job
111      out, err, ret = self.runShellCommand('qsub -q gpu '+filename, log = log)
112      if ret:
113        print(err)
114        print(out)
115    else:
116      out, err, ret = self.runShellCommand(cmd, log = log)
117      if ret:
118        print(err)
119        print(out)
120    return out
121
122def processSummary(moduleName, defaultStage, eventNames, times, events):
123  '''Process the Python log summary into plot data'''
124  m = __import__(moduleName)
125  reload(m)
126  # Total Time
127  times.append(m.Time[0])
128  # Particular events
129  for name in eventNames:
130    if name.find(':') >= 0:
131      stageName, name = name.split(':', 1)
132      stage = getattr(m, stageName)
133    else:
134      stage = getattr(m, defaultStage)
135    if name in stage.event:
136      if not name in events:
137        events[name] = []
138      try:
139        events[name].append((max(stage.event[name].Time), sum(stage.event[name].Flops)/(max(stage.event[name].Time) * 1e6)))
140      except ZeroDivisionError:
141        events[name].append((max(stage.event[name].Time), 0))
142  return
143
144def plotTime(library, num, eventNames, sizes, times, events):
145  from pylab import legend, plot, show, title, xlabel, ylabel
146  import numpy as np
147
148  arches = sizes.keys()
149  data   = []
150  for arch in arches:
151    data.append(sizes[arch])
152    data.append(times[arch])
153  plot(*data)
154  title('Performance on '+library+' Example '+str(num))
155  xlabel('Number of Dof')
156  ylabel('Time (s)')
157  legend(arches, 'upper left', shadow = True)
158  show()
159  return
160
161def plotEventTime(library, num, eventNames, sizes, times, events, filename = None):
162  from pylab import close, legend, plot, savefig, show, title, xlabel, ylabel
163  import numpy as np
164
165  close()
166  arches = sizes.keys()
167  bs     = events[arches[0]].keys()[0]
168  data   = []
169  names  = []
170  for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
171    for arch, style in zip(arches, ['-', ':']):
172      if event in events[arch][bs]:
173        names.append(arch+'-'+str(bs)+' '+event)
174        data.append(sizes[arch][bs])
175        data.append(np.array(events[arch][bs][event])[:,0])
176        data.append(color+style)
177      else:
178        print('Could not find %s in %s-%d events' % (event, arch, bs))
179  print(data)
180  plot(*data)
181  title('Performance on '+library+' Example '+str(num))
182  xlabel('Number of Dof')
183  ylabel('Time (s)')
184  legend(names, 'upper left', shadow = True)
185  if filename is None:
186    show()
187  else:
188    savefig(filename)
189  return
190
191def plotEventFlop(library, num, eventNames, sizes, times, events, filename = None):
192  from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel
193  import numpy as np
194
195  arches = sizes.keys()
196  bs     = events[arches[0]].keys()[0]
197  data   = []
198  names  = []
199  for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
200    for arch, style in zip(arches, ['-', ':']):
201      if event in events[arch][bs]:
202        names.append(arch+'-'+str(bs)+' '+event)
203        data.append(sizes[arch][bs])
204        data.append(1e-3*np.array(events[arch][bs][event])[:,1])
205        data.append(color+style)
206      else:
207        print('Could not find %s in %s-%d events' % (event, arch, bs))
208  semilogy(*data)
209  title('Performance on '+library+' Example '+str(num))
210  xlabel('Number of Dof')
211  ylabel('Computation Rate (GF/s)')
212  legend(names, 'upper left', shadow = True)
213  if filename is None:
214    show()
215  else:
216    savefig(filename)
217  return
218
219def plotEventScaling(library, num, eventNames, procs, events, filename = None):
220  from pylab import legend, plot, savefig, semilogy, show, title, xlabel, ylabel
221  import numpy as np
222
223  arches = procs.keys()
224  bs     = events[arches[0]].keys()[0]
225  data   = []
226  names  = []
227  for arch, style in zip(arches, ['-', ':']):
228    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
229      if event in events[arch][bs]:
230        names.append(arch+'-'+str(bs)+' '+event)
231        data.append(procs[arch][bs])
232        data.append(1e-3*np.array(events[arch][bs][event])[:,1])
233        data.append(color+style)
234      else:
235        print('Could not find %s in %s-%d events' % (event, arch, bs))
236  plot(*data)
237  title('Performance on '+library+' Example '+str(num))
238  xlabel('Number of Processors')
239  ylabel('Computation Rate (GF/s)')
240  legend(names, 'upper left', shadow = True)
241  if filename is None:
242    show()
243  else:
244    savefig(filename)
245  return
246
247def plotSummaryLine(library, num, eventNames, sizes, times, events):
248  from pylab import legend, plot, show, title, xlabel, ylabel
249  import numpy as np
250  showTime       = False
251  showEventTime  = True
252  showEventFlops = True
253  arches         = sizes.keys()
254  # Time
255  if showTime:
256    data = []
257    for arch in arches:
258      data.append(sizes[arch])
259      data.append(times[arch])
260    plot(*data)
261    title('Performance on '+library+' Example '+str(num))
262    xlabel('Number of Dof')
263    ylabel('Time (s)')
264    legend(arches, 'upper left', shadow = True)
265    show()
266  # Common event time
267  #   We could make a stacked plot like Rio uses here
268  if showEventTime:
269    bs    = events[arches[0]].keys()[0]
270    data  = []
271    names = []
272    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
273      for arch, style in zip(arches, ['-', ':']):
274        if event in events[arch][bs]:
275          names.append(arch+'-'+str(bs)+' '+event)
276          data.append(sizes[arch][bs])
277          data.append(np.array(events[arch][bs][event])[:,0])
278          data.append(color+style)
279        else:
280          print('Could not find %s in %s-%d events' % (event, arch, bs))
281    print(data)
282    plot(*data)
283    title('Performance on '+library+' Example '+str(num))
284    xlabel('Number of Dof')
285    ylabel('Time (s)')
286    legend(names, 'upper left', shadow = True)
287    show()
288  # Common event flops
289  #   We could make a stacked plot like Rio uses here
290  if showEventFlops:
291    bs    = events[arches[0]].keys()[0]
292    data  = []
293    names = []
294    for event, color in zip(eventNames, ['b', 'g', 'r', 'y']):
295      for arch, style in zip(arches, ['-', ':']):
296        if event in events[arch][bs]:
297          names.append(arch+'-'+str(bs)+' '+event)
298          data.append(sizes[arch][bs])
299          data.append(np.array(events[arch][bs][event])[:,1])
300          data.append(color+style)
301        else:
302          print('Could not find %s in %s-%d events' % (event, arch, bs))
303    plot(*data)
304    title('Performance on '+library+' Example '+str(num))
305    xlabel('Number of Dof')
306    ylabel('Computation Rate (MF/s)')
307    legend(names, 'upper left', shadow = True)
308    show()
309  return
310
311def plotSummaryBar(library, num, eventNames, sizes, times, events):
312  import numpy as np
313  import matplotlib.pyplot as plt
314
315  eventColors = ['b', 'g', 'r', 'y']
316  arches = sizes.keys()
317  names  = []
318  N      = len(sizes[arches[0]])
319  width  = 0.2
320  ind    = np.arange(N) - 0.25
321  bars   = {}
322  for arch in arches:
323    bars[arch] = []
324    bottom = np.zeros(N)
325    for event, color in zip(eventNames, eventColors):
326      names.append(arch+' '+event)
327      times = np.array(events[arch][event])[:,0]
328      bars[arch].append(plt.bar(ind, times, width, color=color, bottom=bottom))
329      bottom += times
330    ind += 0.3
331
332  plt.xlabel('Number of Dof')
333  plt.ylabel('Time (s)')
334  plt.title('GPU vs. CPU Performance on '+library+' Example '+str(num))
335  plt.xticks(np.arange(N), map(str, sizes[arches[0]]))
336  #plt.yticks(np.arange(0,81,10))
337  #plt.legend( (p1[0], p2[0]), ('Men', 'Women') )
338  plt.legend([bar[0] for bar in bars[arches[0]]], eventNames, 'upper right', shadow = True)
339
340  plt.show()
341  return
342
343def getDMComplexSize(dim, out):
344  '''Retrieves the number of cells from -dm_view output'''
345  size = 0
346  for line in out.split('\n'):
347    if line.strip().startswith(str(dim)+'-cells: '):
348      sizes = line.strip()[9:].split()
349      size  = sum(map(int, sizes))
350      break
351  return size
352
353def run_DMDA(ex, name, opts, args, sizes, times, events, log=True):
354  for n in map(int, args.size):
355    ex.run(log=log, da_grid_x=n, da_grid_y=n, **opts)
356    sizes[name].append(n*n * args.comp)
357    processSummary('summary', args.stage, args.events, times[name], events[name])
358  return
359
360def run_DMComplex(ex, name, opts, args, sizes, times, events, log=True):
361  # This should eventually be replaced by a direct FFC/Ignition interface
362  if args.operator == 'laplacian':
363    numComp  = 1
364  elif args.operator == 'elasticity':
365    numComp  = args.dim
366  else:
367    raise RuntimeError('Unknown operator: %s' % args.operator)
368
369  for numBlock in [2**i for i in map(int, args.blockExp)]:
370    opts['gpu_blocks'] = numBlock
371    args.files = ['['+','.join(source)+']']
372    buildExample(args)
373    sizes[name][numBlock]  = []
374    times[name][numBlock]  = []
375    events[name][numBlock] = {}
376    for r in map(float, args.refine):
377      out = ex.run(log=log, refinement_limit=r, **opts)
378      sizes[name][numBlock].append(getDMComplexSize(args.dim, out))
379      processSummary('summary', args.stage, args.events, times[name][numBlock], events[name][numBlock])
380  return
381
382def outputData(sizes, times, events, name = 'output.py'):
383  if os.path.exists(name):
384    base, ext = os.path.splitext(name)
385    num = 1
386    while os.path.exists(base+str(num)+ext):
387      num += 1
388    name = base+str(num)+ext
389  with file(name, 'w') as f:
390    f.write('#PETSC_ARCH='+os.environ['PETSC_ARCH']+' '+' '.join(sys.argv)+'\n')
391    f.write('sizes  = '+repr(sizes)+'\n')
392    f.write('times  = '+repr(times)+'\n')
393    f.write('events = '+repr(events)+'\n')
394  return
395
396if __name__ == '__main__':
397  import argparse
398
399  parser = argparse.ArgumentParser(description     = 'PETSc Benchmarking',
400                                   epilog          = 'This script runs src/<library>/examples/tutorials/ex<num>, For more information, visit http://www.mcs.anl.gov/petsc',
401                                   formatter_class = argparse.ArgumentDefaultsHelpFormatter)
402  parser.add_argument('--library', default='SNES',                     help='The PETSc library used in this example')
403  parser.add_argument('--num',     type = int, default='5',            help='The example number')
404  parser.add_argument('--module',  default='summary',                  help='The module for timing output')
405  parser.add_argument('--stage',   default='Main_Stage',               help='The default logging stage')
406  parser.add_argument('--events',  nargs='+',                          help='Events to process')
407  parser.add_argument('--batch',   action='store_true', default=False, help='Generate batch files for the runs instead')
408  parser.add_argument('--daemon',  action='store_true', default=False, help='Run as a daemon')
409  parser.add_argument('--gpulang', default='OpenCL',                   help='GPU Language to use: Either CUDA or OpenCL (default)')
410  subparsers = parser.add_subparsers(help='DM types')
411
412  parser_dmda = subparsers.add_parser('DMDA', help='Use a DMDA for the problem geometry')
413  parser_dmda.add_argument('--size', nargs='+',  default=['10'], help='Grid size (implementation dependent)')
414  parser_dmda.add_argument('--comp', type = int, default='1',    help='Number of field components')
415  parser_dmda.add_argument('runs',   nargs='*',                  help='Run descriptions: <name>=<args>')
416
417  parser_dmmesh = subparsers.add_parser('DMComplex', help='Use a DMComplex for the problem geometry')
418  parser_dmmesh.add_argument('--dim',      type = int, default='2',        help='Spatial dimension')
419  parser_dmmesh.add_argument('--refine',   nargs='+',  default=['0.0'],    help='List of refinement limits')
420  parser_dmmesh.add_argument('--order',    type = int, default='1',        help='Order of the finite element')
421  parser_dmmesh.add_argument('--operator', default='laplacian',            help='The operator name')
422  parser_dmmesh.add_argument('--blockExp', nargs='+', default=range(0, 5), help='List of block exponents j, block size is 2^j')
423  parser_dmmesh.add_argument('runs',       nargs='*',                      help='Run descriptions: <name>=<args>')
424
425  args = parser.parse_args()
426  print(args)
427  if hasattr(args, 'comp'):
428    args.dmType = 'DMDA'
429  else:
430    args.dmType = 'DMComplex'
431
432  ex     = PETScExample(args.library, args.num, log_summary='summary.dat', log_summary_python = None if args.batch else args.module+'.py', preload='off')
433  if args.gpulang == 'CUDA':
434    source = ex.petsc.source(args.library, args.num, '.cu')
435  else:
436    source = ex.petsc.source(args.library, args.num, 'OpenCL.c')  # Using the convention of OpenCL code residing in source files ending in 'OpenCL.c' (at least for snes/ex52)
437  sizes  = {}
438  times  = {}
439  events = {}
440  log    = not args.daemon
441
442  if args.daemon:
443    import daemon
444    print('Starting daemon')
445    daemon.createDaemon('.')
446
447  for run in args.runs:
448    name, stropts = run.split('=', 1)
449    opts = dict([t if len(t) == 2 else (t[0], None) for t in [arg.split('=', 1) for arg in stropts.split(' ')]])
450    if args.dmType == 'DMDA':
451      sizes[name]  = []
452      times[name]  = []
453      events[name] = {}
454      run_DMDA(ex, name, opts, args, sizes, times, events, log=log)
455    elif args.dmType == 'DMComplex':
456      sizes[name]  = {}
457      times[name]  = {}
458      events[name] = {}
459      run_DMComplex(ex, name, opts, args, sizes, times, events, log=log)
460  outputData(sizes, times, events)
461  if not args.batch and log: plotSummaryLine(args.library, args.num, args.events, sizes, times, events)
462# Benchmark for ex50
463# ./src/benchmarks/benchmarkExample.py --events VecMDot VecMAXPY KSPGMRESOrthog MatMult VecCUSPCopyTo VecCUSPCopyFrom MatCUSPCopyTo --num 50 DMDA --size 10 20 50 100 --comp 4 CPU='pc_type=none mat_no_inode dm_vec_type=seq dm_mat_type=seqaij' GPU='pc_type=none mat_no_inode dm_vec_type=seqcusp dm_mat_type=seqaijcusp cusp_synchronize'
464# Benchmark for ex52
465# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dm_view show_residual=0 compute_function batch' GPU='dm_view show_residual=0 compute_function batch gpu gpu_batches=8'
466# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 --operator=elasticity CPU='dm_view op_type=elasticity show_residual=0 compute_function batch' GPU='dm_view op_type=elasticity show_residual=0 compute_function batch gpu gpu_batches=8'
467# ./src/benchmarks/benchmarkExample.py --events IntegBatchCPU IntegBatchGPU IntegGPUOnly --num 52 DMComplex --dim=3 --refine 0.0625 0.00625 0.000625 0.0000625 --blockExp 4 --order=1 CPU='dim=3 dm_view show_residual=0 compute_function batch' GPU='dim=3 dm_view show_residual=0 compute_function batch gpu gpu_batches=8'
468