xref: /petsc/config/BuildSystem/retrieval.py (revision c189a2f2c83bbf756b4cbb984a33c38cce550a3f)
1from __future__ import absolute_import
2import logger
3
4import os
5try:
6  from urllib import urlretrieve
7except ImportError:
8  from urllib.request import urlretrieve
9try:
10  import urlparse as urlparse_local # novermin
11except ImportError:
12  from urllib import parse as urlparse_local # novermin
13import config.base
14import socket
15import shutil
16
17# Fix parsing for nonstandard schemes
18urlparse_local.uses_netloc.extend(['bk', 'ssh', 'svn'])
19
20class Retriever(logger.Logger):
21  def __init__(self, sourceControl, clArgs = None, argDB = None):
22    logger.Logger.__init__(self, clArgs, argDB)
23    self.sourceControl = sourceControl
24    self.gitsubmodules = []
25    self.gitprereq = 1
26    self.git_urls = []
27    self.hg_urls = []
28    self.dir_urls = []
29    self.link_urls = []
30    self.tarball_urls = []
31    self.stamp = None
32    return
33
34  def isGitURL(self, url):
35    parsed = urlparse_local.urlparse(url)
36    if (parsed[0] == 'git') or (parsed[0] == 'ssh' and parsed[2].endswith('.git')) or (parsed[0] == 'https' and parsed[2].endswith('.git')):
37      return True
38    elif os.path.isdir(url) and self.isDirectoryGitRepo(url):
39      return True
40    return False
41
42  def setupURLs(self,packagename,urls,gitsubmodules,gitprereq):
43    self.packagename = packagename
44    self.gitsubmodules = gitsubmodules
45    self.gitprereq = gitprereq
46    for url in urls:
47      parsed = urlparse_local.urlparse(url)
48      if self.isGitURL(url):
49        self.git_urls.append(self.removePrefix(url,'git://'))
50      elif parsed[0] == 'hg'or (parsed[0] == 'ssh' and parsed[1].startswith('hg@')):
51        self.hg_urls.append(self.removePrefix(url,'hg://'))
52      elif parsed[0] == 'dir' or os.path.isdir(url):
53        self.dir_urls.append(self.removePrefix(url,'dir://'))
54      elif parsed[0] == 'link':
55        self.link_urls.append(self.removePrefix(url,'link://'))
56      else:
57        # check for ftp.mcs.anl.gov - and use https://,www.mcs.anl.gov,ftp://
58        if url.find('ftp.mcs.anl.gov') != -1:
59          https_url = url.replace('http://','https://').replace('ftp://','http://')
60          self.tarball_urls.extend([https_url,https_url.replace('ftp.mcs.anl.gov/pub/petsc/','www.mcs.anl.gov/petsc/mirror/'),https_url.replace('https://','ftp')])
61        else:
62          self.tarball_urls.extend([url])
63
64  def isDirectoryGitRepo(self, directory):
65    if not hasattr(self.sourceControl, 'git'):
66      self.logPrint('git not found in self.sourceControl - cannot evaluate isDirectoryGitRepo(): '+directory)
67      return False
68    from config.base import Configure
69    for loc in ['.git','']:
70      cmd = '%s rev-parse --resolve-git-dir  %s'  % (self.sourceControl.git, os.path.join(directory,loc))
71      (output, error, ret) = Configure.executeShellCommand(cmd, checkCommand = Configure.passCheckCommand, log = self.log)
72      if not ret:
73        return True
74    return False
75
76  @staticmethod
77  def removeTarget(t):
78    if os.path.islink(t) or os.path.isfile(t):
79      os.unlink(t) # same as os.remove(t)
80    elif os.path.isdir(t):
81      shutil.rmtree(t)
82
83  @staticmethod
84  def getDownloadFailureMessage(package, url, filename=None):
85    slashFilename = '/'+filename if filename else ''
86    return '''\
87Unable to download package %s from: %s
88* If URL specified manually - perhaps there is a typo?
89* If your network is disconnected - please reconnect and rerun ./configure
90* Or perhaps you have a firewall blocking the download
91* You can run with --with-packages-download-dir=/adirectory and ./configure will instruct you what packages to download manually
92* or you can download the above URL manually, to /yourselectedlocation%s
93  and use the configure option:
94  --download-%s=/yourselectedlocation%s
95    ''' % (package.upper(), url, slashFilename, package, slashFilename)
96
97  @staticmethod
98  def removePrefix(url,prefix):
99    '''Replacement for str.removeprefix() supported only since Python 3.9'''
100    if url.startswith(prefix):
101      return url[len(prefix):]
102    return url
103
104  def generateURLs(self):
105    if hasattr(self.sourceControl, 'git') and self.gitprereq:
106      for url in self.git_urls:
107        yield('git',url)
108    else:
109      self.logPrint('Git not found or gitprereq check failed! skipping giturls: '+str(self.git_urls)+'\n')
110    if hasattr(self.sourceControl, 'hg'):
111      for url in self.hg_urls:
112        yield('hg',url)
113    else:
114      self.logPrint('Hg not found - skipping hgurls: '+str(self.hg_urls)+'\n')
115    for url in self.dir_urls:
116      yield('dir',url)
117    for url in self.link_urls:
118      yield('link',url)
119    for url in self.tarball_urls:
120      yield('tarball',url)
121
122  def genericRetrieve(self,proto,url,root):
123    '''Fetch package from version control repository or tarfile indicated by URL and extract it into root'''
124    if proto == 'git':
125      return self.gitRetrieve(url,root)
126    elif proto == 'hg':
127      return self.hgRetrieve(url,root)
128    elif proto == 'dir':
129      return self.dirRetrieve(url,root)
130    elif proto == 'link':
131      self.linkRetrieve(url,root)
132    elif proto == 'tarball':
133      self.tarballRetrieve(url,root)
134
135  def dirRetrieve(self, url, root):
136    self.logPrint('Retrieving %s as directory' % url, 3, 'install')
137    if not os.path.isdir(url): raise RuntimeError('URL %s is not a directory' % url)
138
139    t = os.path.join(root,os.path.basename(url))
140    self.removeTarget(t)
141    shutil.copytree(url,t)
142
143  def linkRetrieve(self, url, root):
144    self.logPrint('Retrieving %s as link' % url, 3, 'install')
145    if not os.path.isdir(url): raise RuntimeError('URL %s is not pointing to a directory' % url)
146
147    t = os.path.join(root,os.path.basename(url))
148    self.removeTarget(t)
149    os.symlink(os.path.abspath(url),t)
150
151  def gitRetrieve(self, url, root):
152    self.logPrint('Retrieving %s as git repo' % url, 3, 'install')
153    if not hasattr(self.sourceControl, 'git'):
154      raise RuntimeError('self.sourceControl.git not set')
155    if os.path.isdir(url) and not self.isDirectoryGitRepo(url):
156      raise RuntimeError('URL %s is a directory but not a git repository' % url)
157
158    newgitrepo = os.path.join(root,'git.'+self.packagename)
159    self.removeTarget(newgitrepo)
160
161    try:
162      submodopt =''
163      for itm in self.gitsubmodules:
164        submodopt += ' --recurse-submodules='+itm
165      config.base.Configure.executeShellCommand('%s clone %s %s %s' % (self.sourceControl.git, submodopt, url, newgitrepo), log = self.log, timeout = 120.0)
166    except  RuntimeError as e:
167      self.logPrint('ERROR: '+str(e))
168      err = str(e)
169      failureMessage = self.getDownloadFailureMessage(self.packagename, url)
170      raise RuntimeError('Unable to clone '+self.packagename+'\n'+err+failureMessage)
171
172  def hgRetrieve(self, url, root):
173    self.logPrint('Retrieving %s as hg repo' % url, 3, 'install')
174    if not hasattr(self.sourceControl, 'hg'):
175      raise RuntimeError('self.sourceControl.hg not set')
176
177    newgitrepo = os.path.join(root,'hg.'+self.packagename)
178    self.removeTarget(newgitrepo)
179    try:
180      config.base.Configure.executeShellCommand('%s clone %s %s' % (self.sourceControl.hg, url, newgitrepo), log = self.log, timeout = 120.0)
181    except  RuntimeError as e:
182      self.logPrint('ERROR: '+str(e))
183      err = str(e)
184      failureMessage = self.getDownloadFailureMessage(self.packagename, url)
185      raise RuntimeError('Unable to clone '+self.packagename+'\n'+err+failureMessage)
186
187  def tarballRetrieve(self, url, root):
188    parsed = urlparse_local.urlparse(url)
189    filename = os.path.basename(parsed[2])
190    localFile = os.path.join(root,'_d_'+filename)
191    self.logPrint('Retrieving %s as tarball to %s' % (url,localFile) , 3, 'install')
192    ext =  os.path.splitext(localFile)[1]
193    if ext not in ['.bz2','.tbz','.gz','.tgz','.zip','.ZIP']:
194      raise RuntimeError('Unknown compression type in URL: '+ url)
195
196    self.removeTarget(localFile)
197
198    if parsed[0] == 'file' and not parsed[1]:
199      url = parsed[2]
200    if os.path.exists(url):
201      if not os.path.isfile(url):
202        raise RuntimeError('Local path exists but is not a regular file: '+ url)
203      # copy local file
204      shutil.copyfile(url, localFile)
205    else:
206      # fetch remote file
207      try:
208        sav_timeout = socket.getdefaulttimeout()
209        socket.setdefaulttimeout(30)
210        urlretrieve(url, localFile)
211        socket.setdefaulttimeout(sav_timeout)
212      except Exception as e:
213        socket.setdefaulttimeout(sav_timeout)
214        failureMessage = self.getDownloadFailureMessage(self.packagename, url, filename)
215        raise RuntimeError(failureMessage)
216
217    self.logPrint('Extracting '+localFile)
218    if ext in ['.zip','.ZIP']:
219      config.base.Configure.executeShellCommand('cd '+root+'; unzip '+localFile, log = self.log)
220      output = config.base.Configure.executeShellCommand('cd '+root+'; zipinfo -1 '+localFile+' | head -n 1', log = self.log)
221      dirname = os.path.normpath(output[0].strip())
222    else:
223      failureMessage = '''\
224Downloaded package %s from: %s is not a tarball.
225[or installed python cannot process compressed files]
226* If you are behind a firewall - please fix your proxy and rerun ./configure
227  For example at LANL you may need to set the environmental variable http_proxy (or HTTP_PROXY?) to  http://proxyout.lanl.gov
228* You can run with --with-packages-download-dir=/adirectory and ./configure will instruct you what packages to download manually
229* or you can download the above URL manually, to /yourselectedlocation/%s
230  and use the configure option:
231  --download-%s=/yourselectedlocation/%s
232''' % (self.packagename.upper(), url, filename, self.packagename, filename)
233      import tarfile
234      try:
235        tf  = tarfile.open(os.path.join(root, localFile))
236      except tarfile.ReadError as e:
237        raise RuntimeError(str(e)+'\n'+failureMessage)
238      if not tf: raise RuntimeError(failureMessage)
239      #git puts 'pax_global_header' as the first entry and some tar utils process this as a file
240      firstname = tf.getnames()[0]
241      if firstname == 'pax_global_header':
242        firstmember = tf.getmembers()[1]
243      else:
244        firstmember = tf.getmembers()[0]
245      # some tarfiles list packagename/ but some list packagename/filename in the first entry
246      if firstmember.isdir():
247        dirname = firstmember.name
248      else:
249        dirname = os.path.dirname(firstmember.name)
250      tf.extractall(root)
251      tf.close()
252
253    # fix file permissions for the untared tarballs.
254    try:
255      # check if 'dirname' is set'
256      if dirname:
257        config.base.Configure.executeShellCommand('cd '+root+'; chmod -R a+r '+dirname+';find  '+dirname + ' -type d -name "*" -exec chmod a+rx {} \;', log = self.log)
258      else:
259        self.logPrintBox('WARNING: Could not determine dirname extracted by '+localFile+' to fix file permissions')
260    except RuntimeError as e:
261      raise RuntimeError('Error changing permissions for '+dirname+' obtained from '+localFile+ ' : '+str(e))
262    os.unlink(localFile)
263